From 0e37a511db20dbb05095d92592acabd5a50806dc Mon Sep 17 00:00:00 2001
From: Costa Huang <costa.huang@outlook.com>
Date: Wed, 26 Jan 2022 13:41:56 -0500
Subject: [PATCH 1/6] Add pre-commit utilities

---
 .pre-commit-config.yaml |  30 +++++++++++
 poetry.lock             | 115 +++++++++++++++++++++++++++++++++++++++-
 pyproject.toml          |   1 +
 3 files changed, 144 insertions(+), 2 deletions(-)
 create mode 100644 .pre-commit-config.yaml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..dcc8ad9e
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,30 @@
+repos:
+  - repo: /~https://github.com/PyCQA/isort
+    rev: 5.10.1
+    hooks:
+      - id: isort
+        args:
+          - --skip wandb
+  - repo: /~https://github.com/myint/autoflake
+    rev: v1.4
+    hooks:
+      - id: autoflake
+        args:
+          - -r
+          - --exclude=wandb
+          - --in-place
+          - --remove-unused-variables
+          - --remove-all-unused-imports
+  - repo: /~https://github.com/python/black
+    rev: 21.7b0
+    hooks:
+      - id: black
+        args:
+          - --line-length=127
+          - --exclude=wandb
+  - repo: /~https://github.com/codespell-project/codespell
+    rev: v2.1.0
+    hooks:
+      - id: codespell
+        args:
+          - --ignore-words-list=nd,reacher,thist,ths
diff --git a/poetry.lock b/poetry.lock
index 1fa60d98..f763b7bb 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -269,6 +269,14 @@ python-versions = "*"
 [package.dependencies]
 pycparser = "*"
 
+[[package]]
+name = "cfgv"
+version = "3.3.1"
+description = "Validate configuration and produce human readable error messages."
+category = "dev"
+optional = false
+python-versions = ">=3.6.1"
+
 [[package]]
 name = "chardet"
 version = "4.0.0"
@@ -435,6 +443,14 @@ category = "main"
 optional = true
 python-versions = ">=2.7"
 
+[[package]]
+name = "distlib"
+version = "0.3.4"
+description = "Distribution utilities"
+category = "dev"
+optional = false
+python-versions = "*"
+
 [[package]]
 name = "docker-pycreds"
 version = "0.4.0"
@@ -474,6 +490,18 @@ category = "main"
 optional = true
 python-versions = ">=2.7"
 
+[[package]]
+name = "filelock"
+version = "3.4.2"
+description = "A platform independent file lock."
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.extras]
+docs = ["furo (>=2021.8.17b43)", "sphinx (>=4.1)", "sphinx-autodoc-typehints (>=1.12)"]
+testing = ["covdefaults (>=1.2.0)", "coverage (>=4)", "pytest (>=4)", "pytest-cov", "pytest-timeout (>=1.4.2)"]
+
 [[package]]
 name = "flake8"
 version = "3.9.2"
@@ -584,6 +612,17 @@ other = ["lz4 (>=3.1.0)", "opencv-python (>=3)"]
 robotics = ["mujoco_py (>=1.50,<2.0)"]
 toy_text = ["scipy (>=1.4.1)"]
 
+[[package]]
+name = "identify"
+version = "2.4.5"
+description = "File identification library for Python"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.extras]
+license = ["ukkonen"]
+
 [[package]]
 name = "idna"
 version = "3.3"
@@ -1054,6 +1093,14 @@ category = "main"
 optional = true
 python-versions = ">=3.5"
 
+[[package]]
+name = "nodeenv"
+version = "1.6.0"
+description = "Node.js virtual environment builder"
+category = "dev"
+optional = false
+python-versions = "*"
+
 [[package]]
 name = "numpy"
 version = "1.21.4"
@@ -1227,7 +1274,7 @@ name = "platformdirs"
 version = "2.4.0"
 description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
 category = "main"
-optional = true
+optional = false
 python-versions = ">=3.6"
 
 [package.extras]
@@ -1270,6 +1317,23 @@ category = "main"
 optional = true
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 
+[[package]]
+name = "pre-commit"
+version = "2.17.0"
+description = "A framework for managing and maintaining multi-language pre-commit hooks."
+category = "dev"
+optional = false
+python-versions = ">=3.6.1"
+
+[package.dependencies]
+cfgv = ">=2.0.0"
+identify = ">=1.0.0"
+importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
+nodeenv = ">=0.11.1"
+pyyaml = ">=5.1"
+toml = "*"
+virtualenv = ">=20.0.8"
+
 [[package]]
 name = "promise"
 version = "2.3"
@@ -4082,6 +4146,25 @@ brotli = ["brotlipy (>=0.6.0)"]
 secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"]
 socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
 
+[[package]]
+name = "virtualenv"
+version = "20.13.0"
+description = "Virtual Python Environment builder"
+category = "dev"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
+
+[package.dependencies]
+distlib = ">=0.3.1,<1"
+filelock = ">=3.2,<4"
+importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""}
+platformdirs = ">=2,<3"
+six = ">=1.9.0,<2"
+
+[package.extras]
+docs = ["proselint (>=0.10.2)", "sphinx (>=3)", "sphinx-argparse (>=0.2.5)", "sphinx-rtd-theme (>=0.4.3)", "towncrier (>=21.3)"]
+testing = ["coverage (>=4)", "coverage-enable-subprocess (>=1)", "flaky (>=3)", "pytest (>=4)", "pytest-env (>=0.6.2)", "pytest-freezegun (>=0.4.1)", "pytest-mock (>=2)", "pytest-randomly (>=1)", "pytest-timeout (>=1)", "packaging (>=20.0)"]
+
 [[package]]
 name = "wandb"
 version = "0.12.6"
@@ -4209,7 +4292,7 @@ spyder = ["spyder"]
 [metadata]
 lock-version = "1.1"
 python-versions = ">=3.7.1,<3.10"
-content-hash = "39a9e11b61b9edd0e83ecaf014cb7e773a6970bb9cefe387862ef1fb1e087313"
+content-hash = "d3e58d69b86b68980d6f1411987e6effbb4b051a5c94839997685794c1135dcb"
 
 [metadata.files]
 absl-py = [
@@ -4353,6 +4436,10 @@ cffi = [
     {file = "cffi-1.15.0-cp39-cp39-win_amd64.whl", hash = "sha256:3773c4d81e6e818df2efbc7dd77325ca0dcb688116050fb2b3011218eda36139"},
     {file = "cffi-1.15.0.tar.gz", hash = "sha256:920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954"},
 ]
+cfgv = [
+    {file = "cfgv-3.3.1-py2.py3-none-any.whl", hash = "sha256:c6a0883f3917a037485059700b9e75da2464e6c27051014ad85ba6aaa5884426"},
+    {file = "cfgv-3.3.1.tar.gz", hash = "sha256:f5a830efb9ce7a445376bb66ec94c638a9787422f96264c98edc6bdeed8ab736"},
+]
 chardet = [
     {file = "chardet-4.0.0-py2.py3-none-any.whl", hash = "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"},
     {file = "chardet-4.0.0.tar.gz", hash = "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa"},
@@ -4446,6 +4533,10 @@ diff-match-patch = [
     {file = "diff-match-patch-20200713.tar.gz", hash = "sha256:da6f5a01aa586df23dfc89f3827e1cafbb5420be9d87769eeb079ddfd9477a18"},
     {file = "diff_match_patch-20200713-py3-none-any.whl", hash = "sha256:8bf9d9c4e059d917b5c6312bac0c137971a32815ddbda9c682b949f2986b4d34"},
 ]
+distlib = [
+    {file = "distlib-0.3.4-py2.py3-none-any.whl", hash = "sha256:6564fe0a8f51e734df6333d08b8b94d4ea8ee6b99b5ed50613f731fd4089f34b"},
+    {file = "distlib-0.3.4.zip", hash = "sha256:e4b58818180336dc9c529bfb9a0b58728ffc09ad92027a3f30b7cd91e3458579"},
+]
 docker-pycreds = [
     {file = "docker-pycreds-0.4.0.tar.gz", hash = "sha256:6ce3270bcaf404cc4c3e27e4b6c70d3521deae82fb508767870fdbf772d584d4"},
     {file = "docker_pycreds-0.4.0-py2.py3-none-any.whl", hash = "sha256:7266112468627868005106ec19cd0d722702d2b7d5912a28e19b826c3d37af49"},
@@ -4463,6 +4554,10 @@ entrypoints = [
     {file = "entrypoints-0.3-py2.py3-none-any.whl", hash = "sha256:589f874b313739ad35be6e0cd7efde2a4e9b6fea91edcc34e58ecbb8dbe56d19"},
     {file = "entrypoints-0.3.tar.gz", hash = "sha256:c70dd71abe5a8c85e55e12c19bd91ccfeec11a6e99044204511f9ed547d48451"},
 ]
+filelock = [
+    {file = "filelock-3.4.2-py3-none-any.whl", hash = "sha256:cf0fc6a2f8d26bd900f19bf33915ca70ba4dd8c56903eeb14e1e7a2fd7590146"},
+    {file = "filelock-3.4.2.tar.gz", hash = "sha256:38b4f4c989f9d06d44524df1b24bd19e167d851f19b50bf3e3559952dddc5b80"},
+]
 flake8 = [
     {file = "flake8-3.9.2-py2.py3-none-any.whl", hash = "sha256:bf8fd333346d844f616e8d47905ef3a3384edae6b4e9beb0c5101e25e3110907"},
     {file = "flake8-3.9.2.tar.gz", hash = "sha256:07528381786f2a6237b061f6e96610a4167b226cb926e2aa2b6b1d78057c576b"},
@@ -4532,6 +4627,10 @@ grpcio = [
 gym = [
     {file = "gym-0.21.0.tar.gz", hash = "sha256:0fd1ce165c754b4017e37a617b097c032b8c3feb8a0394ccc8777c7c50dddff3"},
 ]
+identify = [
+    {file = "identify-2.4.5-py2.py3-none-any.whl", hash = "sha256:d27d10099844741c277b45d809bd452db0d70a9b41ea3cd93799ebbbcc6dcb29"},
+    {file = "identify-2.4.5.tar.gz", hash = "sha256:d11469ff952a4d7fd7f9be520d335dc450f585d474b39b5dfb86a500831ab6c7"},
+]
 idna = [
     {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"},
     {file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"},
@@ -4801,6 +4900,10 @@ nest-asyncio = [
     {file = "nest_asyncio-1.5.1-py3-none-any.whl", hash = "sha256:76d6e972265063fe92a90b9cc4fb82616e07d586b346ed9d2c89a4187acea39c"},
     {file = "nest_asyncio-1.5.1.tar.gz", hash = "sha256:afc5a1c515210a23c461932765691ad39e8eba6551c055ac8d5546e69250d0aa"},
 ]
+nodeenv = [
+    {file = "nodeenv-1.6.0-py2.py3-none-any.whl", hash = "sha256:621e6b7076565ddcacd2db0294c0381e01fd28945ab36bcf00f41c5daf63bef7"},
+    {file = "nodeenv-1.6.0.tar.gz", hash = "sha256:3ef13ff90291ba2a4a7a4ff9a979b63ffdd00a464dbe04acf0ea6471517a4c2b"},
+]
 numpy = [
     {file = "numpy-1.21.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8890b3360f345e8360133bc078d2dacc2843b6ee6059b568781b15b97acbe39f"},
     {file = "numpy-1.21.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:69077388c5a4b997442b843dbdc3a85b420fb693ec8e33020bb24d647c164fa5"},
@@ -4989,6 +5092,10 @@ poyo = [
     {file = "poyo-0.5.0-py2.py3-none-any.whl", hash = "sha256:3e2ca8e33fdc3c411cd101ca395668395dd5dc7ac775b8e809e3def9f9fe041a"},
     {file = "poyo-0.5.0.tar.gz", hash = "sha256:e26956aa780c45f011ca9886f044590e2d8fd8b61db7b1c1cf4e0869f48ed4dd"},
 ]
+pre-commit = [
+    {file = "pre_commit-2.17.0-py2.py3-none-any.whl", hash = "sha256:725fa7459782d7bec5ead072810e47351de01709be838c2ce1726b9591dad616"},
+    {file = "pre_commit-2.17.0.tar.gz", hash = "sha256:c1a8040ff15ad3d648c70cc3e55b93e4d2d5b687320955505587fd79bbaed06a"},
+]
 promise = [
     {file = "promise-2.3.tar.gz", hash = "sha256:dfd18337c523ba4b6a58801c164c1904a9d4d1b1747c7d5dbf45b693a49d93d0"},
 ]
@@ -6347,6 +6454,10 @@ urllib3 = [
     {file = "urllib3-1.26.7-py2.py3-none-any.whl", hash = "sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844"},
     {file = "urllib3-1.26.7.tar.gz", hash = "sha256:4987c65554f7a2dbf30c18fd48778ef124af6fab771a377103da0585e2336ece"},
 ]
+virtualenv = [
+    {file = "virtualenv-20.13.0-py2.py3-none-any.whl", hash = "sha256:339f16c4a86b44240ba7223d0f93a7887c3ca04b5f9c8129da7958447d079b09"},
+    {file = "virtualenv-20.13.0.tar.gz", hash = "sha256:d8458cf8d59d0ea495ad9b34c2599487f8a7772d796f9910858376d1600dd2dd"},
+]
 wandb = [
     {file = "wandb-0.12.6-py2.py3-none-any.whl", hash = "sha256:a486a697d18ca82e1cde64aa60997a9a37c71af3c6946240bda81a4d61f2bcf4"},
     {file = "wandb-0.12.6.tar.gz", hash = "sha256:ad946efc269b25a36b500a831b6bf9ae26b4a695add55e4a53f5b7220e03b177"},
diff --git a/pyproject.toml b/pyproject.toml
index c5fd79fe..c31cf1f1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,6 +27,7 @@ trueskill = "^0.4.5"
 stable-baselines3 = "^1.1.0"
 torch = "1.7.1"
 pytest = "^6.2.5"
+pre-commit = "^2.17.0"
 
 [tool.poetry-dynamic-versioning]
 enable = true

From e794ac91c785cbe1828ff82b8b3c7d6bb0a30545 Mon Sep 17 00:00:00 2001
From: Costa Huang <costa.huang@outlook.com>
Date: Wed, 26 Jan 2022 13:48:34 -0500
Subject: [PATCH 2/6] isort autoflake

---
 experiments/league.py           | 181 +++++++++----------
 experiments/ppo_gridnet.py      |  40 ++--
 experiments/ppo_gridnet_eval.py |  16 +-
 gym_microrts/envs/vec_env.py    | 226 +++++++++++++----------
 gym_microrts/microrts_ai.py     |  40 +++-
 gym_microrts/microrts_maps.py   |   2 +-
 hello_world.py                  |  45 +++--
 tests/test_e2e.py               |   5 +-
 tests/test_mask.py              | 311 ++++++++++++++++++++++++++------
 tests/test_observation.py       | 173 ++++++++++++++----
 tests/test_reward.py            |  50 ++---
 11 files changed, 745 insertions(+), 344 deletions(-)

diff --git a/experiments/league.py b/experiments/league.py
index 439a4cf5..bb708928 100644
--- a/experiments/league.py
+++ b/experiments/league.py
@@ -1,38 +1,27 @@
 # http://proceedings.mlr.press/v97/han19a/han19a.pdf
 
 import argparse
+import datetime
+import itertools
 import os
 import random
-import time
+import shutil
 from distutils.util import strtobool
+from enum import Enum
 
 import numpy as np
-import pickle
 import pandas as pd
 import torch
-from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv, MicroRTSBotVecEnv
-from gym_microrts import microrts_ai # fmt: off
-from stable_baselines3.common.vec_env import VecMonitor, VecVideoRecorder
-from torch.utils.tensorboard import SummaryWriter
-from trueskill import TrueSkill, Rating, rate_1vs1, quality_1vs1
+from peewee import (JOIN, CharField, DateTimeField, FloatField,
+                    ForeignKeyField, Model, SmallIntegerField, SqliteDatabase,
+                    fn)
 from ppo_gridnet import Agent, MicroRTSStatsRecorder
-import itertools
-from peewee import (
-    Model,
-    SqliteDatabase,
-    CharField,
-    ForeignKeyField,
-    TextField,
-    DateTimeField,
-    BooleanField,
-    FloatField,
-    SmallIntegerField,
-    JOIN,
-    fn,
-)
-import datetime
-from enum import Enum
-import shutil
+from stable_baselines3.common.vec_env import VecMonitor
+from trueskill import Rating, quality_1vs1, rate_1vs1
+
+from gym_microrts import microrts_ai  # fmt: off
+from gym_microrts.envs.vec_env import MicroRTSBotVecEnv, MicroRTSGridModeVecEnv
+
 
 def parse_args():
     # fmt: off
@@ -64,33 +53,40 @@ def parse_args():
     # fmt: on
     return args
 
+
 args = parse_args()
 dbname = "league"
-if(args.partial_obs):
-    dbname = 'po_league'
+if args.partial_obs:
+    dbname = "po_league"
 dbpath = f"gym-microrts-static-files/{dbname}.db"
 csvpath = f"gym-microrts-static-files/{dbname}.csv"
 db = SqliteDatabase(dbpath)
+
+
 class BaseModel(Model):
     class Meta:
         database = db
 
+
 class AI(BaseModel):
     name = CharField(unique=True)
     mu = FloatField()
     sigma = FloatField()
     ai_type = CharField()
+
     def __str__(self):
         return f"🤖 {self.name} with N({round(self.mu, 3)}, {round(self.sigma, 3)})"
 
+
 class MatchHistory(BaseModel):
-    challenger = ForeignKeyField(AI, backref='challenger_match_histories')
-    defender = ForeignKeyField(AI, backref='defender_match_histories')
+    challenger = ForeignKeyField(AI, backref="challenger_match_histories")
+    defender = ForeignKeyField(AI, backref="defender_match_histories")
     win = SmallIntegerField()
     draw = SmallIntegerField()
     loss = SmallIntegerField()
     created_date = DateTimeField(default=datetime.datetime.now)
 
+
 db.connect()
 db.create_tables([AI, MatchHistory])
 
@@ -100,17 +96,18 @@ class Outcome(Enum):
     DRAW = 0
     LOSS = -1
 
+
 class Match:
     def __init__(self, partial_obs: bool, match_up=None):
         # mode 0: rl-ai vs built-in-ai
         # mode 1: rl-ai vs rl-ai
         # mode 2: built-in-ai vs built-in-ai
 
-        built_in_ais=None
-        built_in_ais2=None
-        rl_ai=None
-        rl_ai2=None
-        
+        built_in_ais = None
+        built_in_ais2 = None
+        rl_ai = None
+        rl_ai2 = None
+
         # determine mode
         rl_ais = []
         built_in_ais = []
@@ -123,23 +120,23 @@ def __init__(self, partial_obs: bool, match_up=None):
             mode = 0
             p0 = rl_ais[0]
             p1 = built_in_ais[0]
-            rl_ai=p0
-            built_in_ais=[eval(f"microrts_ai.{p1}")]
+            rl_ai = p0
+            built_in_ais = [eval(f"microrts_ai.{p1}")]
         elif len(rl_ais) == 2:
             mode = 1
             p0 = rl_ais[0]
             p1 = rl_ais[1]
-            rl_ai=p0
-            rl_ai2=p1
+            rl_ai = p0
+            rl_ai2 = p1
         else:
             mode = 2
             p0 = built_in_ais[0]
             p1 = built_in_ais[1]
-            built_in_ais=[eval(f"microrts_ai.{p0}")]
-            built_in_ais2=[eval(f"microrts_ai.{p1}")]
-        
+            built_in_ais = [eval(f"microrts_ai.{p0}")]
+            built_in_ais2 = [eval(f"microrts_ai.{p1}")]
+
         self.p0, self.p1 = p0, p1
-        
+
         self.mode = mode
         self.partial_obs = partial_obs
         self.built_in_ais = built_in_ais
@@ -185,7 +182,7 @@ def __init__(self, partial_obs: bool, match_up=None):
                 max_steps=max_steps,
                 render_theme=2,
                 map_paths=["maps/16x16/basesWorkers16x16.xml"],
-                reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0])
+                reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]),
             )
         self.envs = MicroRTSStatsRecorder(self.envs)
         self.envs = VecMonitor(self.envs)
@@ -197,10 +194,10 @@ def run(self, num_matches=7):
             return self.run_m1(num_matches)
         else:
             return self.run_m2(num_matches)
-        
+
     def run_m0(self, num_matches):
         results = []
-        mapsize = 16 * 16
+        16 * 16
         next_obs = torch.Tensor(self.envs.reset()).to(self.device)
         while True:
             # self.envs.render()
@@ -216,7 +213,7 @@ def run_m0(self, num_matches):
             except Exception as e:
                 e.printStackTrace()
                 raise
-    
+
             for idx, info in enumerate(infos):
                 if "episode" in info.keys():
                     results += [info["microrts_stats"]["WinLossRewardFunction"]]
@@ -225,19 +222,19 @@ def run_m0(self, num_matches):
 
     def run_m1(self, num_matches):
         results = []
-        mapsize = 16 * 16
+        16 * 16
         next_obs = torch.Tensor(self.envs.reset()).to(self.device)
         while True:
             # self.envs.render()
             # ALGO LOGIC: put action logic here
             with torch.no_grad():
                 mask = torch.tensor(np.array(self.envs.get_action_mask())).to(self.device)
-                
+
                 p1_obs = next_obs[::2]
                 p2_obs = next_obs[1::2]
                 p1_mask = mask[::2]
                 p2_mask = mask[1::2]
-                
+
                 p1_action, _, _, _, _ = self.agent.get_action_and_value(
                     p1_obs, envs=self.envs, invalid_action_masks=p1_mask, device=self.device
                 )
@@ -254,7 +251,7 @@ def run_m1(self, num_matches):
             except Exception as e:
                 e.printStackTrace()
                 raise
-    
+
             for idx, info in enumerate(infos):
                 if "episode" in info.keys():
                     results += [info["microrts_stats"]["WinLossRewardFunction"]]
@@ -268,28 +265,34 @@ def run_m2(self, num_matches):
             # self.envs.render()
             # dummy actions
             next_obs, reward, done, infos = self.envs.step(
-                [[[0, 0, 0, 0, 0, 0, 0, 0],
-                  [0, 0, 0, 0, 0, 0, 0, 0],]]) 
+                [
+                    [
+                        [0, 0, 0, 0, 0, 0, 0, 0],
+                        [0, 0, 0, 0, 0, 0, 0, 0],
+                    ]
+                ]
+            )
             for idx, info in enumerate(infos):
                 if "episode" in info.keys():
                     results += [info["microrts_stats"]["WinLossRewardFunction"]]
                     if len(results) >= num_matches:
                         return results
 
+
 def get_ai_type(ai_name):
     if ai_name[-3:] == ".pt":
-        return 'rl_ai'
+        return "rl_ai"
     else:
-        return 'built_in_ai'
+        return "built_in_ai"
 
 
 def get_match_history(ai_name):
-    query = (MatchHistory
-        .select(
+    query = (
+        MatchHistory.select(
             AI.name,
-            fn.SUM(MatchHistory.win).alias('wins'),
-            fn.SUM(MatchHistory.draw).alias('draws'),
-            fn.SUM(MatchHistory.loss).alias('losss'),
+            fn.SUM(MatchHistory.win).alias("wins"),
+            fn.SUM(MatchHistory.draw).alias("draws"),
+            fn.SUM(MatchHistory.loss).alias("losss"),
         )
         .join(AI, JOIN.LEFT_OUTER, on=MatchHistory.defender)
         .group_by(MatchHistory.defender)
@@ -297,50 +300,48 @@ def get_match_history(ai_name):
     )
     return pd.DataFrame(list(query.dicts()))
 
+
 def get_leaderboard():
-    query = (AI.select(
-            AI.name,
-            AI.mu,
-            AI.sigma,
-            (AI.mu - 3 * AI.sigma).alias('trueskill'),
-        )
-        .order_by((AI.mu - 3 * AI.sigma).desc())
-    )
+    query = AI.select(
+        AI.name,
+        AI.mu,
+        AI.sigma,
+        (AI.mu - 3 * AI.sigma).alias("trueskill"),
+    ).order_by((AI.mu - 3 * AI.sigma).desc())
     return pd.DataFrame(list(query.dicts()))
 
+
 def get_leaderboard_existing_ais(existing_ai_names):
-    query = (AI.select(
+    query = (
+        AI.select(
             AI.name,
             AI.mu,
             AI.sigma,
-            (AI.mu - 3 * AI.sigma).alias('trueskill'),
+            (AI.mu - 3 * AI.sigma).alias("trueskill"),
         )
         .where((AI.name.in_(existing_ai_names)))
         .order_by((AI.mu - 3 * AI.sigma).desc())
     )
     return pd.DataFrame(list(query.dicts()))
 
+
 if __name__ == "__main__":
     existing_ai_names = [item.name for item in AI.select()]
     all_ai_names = set(existing_ai_names + args.evals)
     if not args.update_db:
         shutil.copyfile(dbpath, f"{dbpath}.backup")
 
-    for ai_name in all_ai_names:  
+    for ai_name in all_ai_names:
         ai = AI.get_or_none(name=ai_name)
         if ai is None:
-            ai = AI(
-                name=ai_name, 
-                mu=25.0,
-                sigma=8.333333333333334,
-                ai_type=get_ai_type(ai_name))
+            ai = AI(name=ai_name, mu=25.0, sigma=8.333333333333334, ai_type=get_ai_type(ai_name))
             ai.save()
 
     # case 1: initialize the league with round robin
     if len(existing_ai_names) == 0:
         match_ups = list(itertools.combinations(all_ai_names, 2))
         np.random.shuffle(match_ups)
-        for idx in range(2): # switch player 1 and 2's starting locations
+        for idx in range(2):  # switch player 1 and 2's starting locations
             for match_up in match_ups:
                 if idx == 0:
                     match_up = list(reversed(match_up))
@@ -348,7 +349,7 @@ def get_leaderboard_existing_ais(existing_ai_names):
                 m = Match(args.partial_obs, match_up)
                 challenger = AI.get_or_none(name=m.p0)
                 defender = AI.get_or_none(name=m.p1)
-                
+
                 r = m.run(args.num_matches // 2)
                 for item in r:
                     drawn = False
@@ -360,19 +361,18 @@ def get_leaderboard_existing_ais(existing_ai_names):
                     else:
                         winner = defender
                         loser = challenger
-                        
+
                     print(f"{winner.name} {'draws' if drawn else 'wins'} {loser.name}")
-                    
+
                     winner_rating, loser_rating = rate_1vs1(
-                        Rating(winner.mu, winner.sigma),
-                        Rating(loser.mu, loser.sigma),
-                        drawn=drawn)
+                        Rating(winner.mu, winner.sigma), Rating(loser.mu, loser.sigma), drawn=drawn
+                    )
 
                     winner.mu, winner.sigma = winner_rating.mu, winner_rating.sigma
                     loser.mu, loser.sigma = loser_rating.mu, loser_rating.sigma
                     winner.save()
                     loser.save()
-                    
+
                     MatchHistory(
                         challenger=challenger,
                         defender=defender,
@@ -397,7 +397,7 @@ def get_leaderboard_existing_ais(existing_ai_names):
                     if ai.name == opponent_ai.name:
                         continue
                     match_qualities += [[opponent_ai, quality_1vs1(ai, opponent_ai)]]
-                
+
                 # sort by quality
                 match_qualities = sorted(match_qualities, key=lambda x: x[1], reverse=True)
                 print("match_qualities[:3]", match_qualities[:3])
@@ -408,8 +408,8 @@ def get_leaderboard_existing_ais(existing_ai_names):
                 match_up = (ai.name, opponent_ai.name)
                 match_quality = quality_1vs1(ai, opponent_ai)
                 print(f"the match up is ({ai}, {opponent_ai}), quality is {round(match_quality, 4)}")
-                winner = ai # dummy setting
-                for idx in range(2): # switch player 1 and 2's starting locations
+                winner = ai  # dummy setting
+                for idx in range(2):  # switch player 1 and 2's starting locations
                     if idx == 0:
                         match_up = list(reversed(match_up))
                     m = Match(args.partial_obs, match_up)
@@ -430,10 +430,9 @@ def get_leaderboard_existing_ais(existing_ai_names):
                             loser = challenger
                         print(f"{winner.name} {'draws' if drawn else 'wins'} {loser.name}")
                         winner_rating, loser_rating = rate_1vs1(
-                            Rating(winner.mu, winner.sigma),
-                            Rating(loser.mu, loser.sigma),
-                            drawn=drawn)
-                        
+                            Rating(winner.mu, winner.sigma), Rating(loser.mu, loser.sigma), drawn=drawn
+                        )
+
                         # freeze existing AIs ratings
                         if winner.name == ai.name:
                             ai.mu, ai.sigma = winner_rating.mu, winner_rating.sigma
@@ -448,9 +447,9 @@ def get_leaderboard_existing_ais(existing_ai_names):
                             draw=int(item == 0),
                             loss=int(item == -1),
                         ).save()
-        
+
         get_leaderboard().to_csv(f"{dbname}.temp.csv", index=False)
-    
+
     print("=======================")
     print(get_leaderboard())
     if not args.update_db:
diff --git a/experiments/ppo_gridnet.py b/experiments/ppo_gridnet.py
index 95a22c3d..4754c4b3 100644
--- a/experiments/ppo_gridnet.py
+++ b/experiments/ppo_gridnet.py
@@ -3,8 +3,8 @@
 import argparse
 import os
 import random
-import time
 import subprocess
+import time
 from distutils.util import strtobool
 
 import numpy as np
@@ -13,12 +13,14 @@
 import torch.nn as nn
 import torch.optim as optim
 from gym.spaces import MultiDiscrete
-from gym_microrts import microrts_ai
-from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv
-from stable_baselines3.common.vec_env import VecEnvWrapper, VecMonitor, VecVideoRecorder
+from stable_baselines3.common.vec_env import (VecEnvWrapper, VecMonitor,
+                                              VecVideoRecorder)
 from torch.distributions.categorical import Categorical
 from torch.utils.tensorboard import SummaryWriter
 
+from gym_microrts import microrts_ai
+from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv
+
 
 def parse_args():
     # fmt: off
@@ -84,7 +86,7 @@ def parse_args():
     parser.add_argument('--anneal-lr', type=lambda x: bool(strtobool(x)), default=True, nargs='?', const=True,
         help="Toggle learning rate annealing for policy and value networks")
     parser.add_argument('--clip-vloss', type=lambda x: bool(strtobool(x)), default=True, nargs='?', const=True,
-        help='Toggles wheter or not to use a clipped loss for the value function, as per the paper.')
+        help='Toggles whether or not to use a clipped loss for the value function, as per the paper.')
     parser.add_argument('--num-models', type=int, default=200,
         help='the number of models saved')
 
@@ -153,7 +155,6 @@ def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
     return layer
 
 
-
 class Agent(nn.Module):
     def __init__(self, envs, mapsize=16 * 16):
         super(Agent, self).__init__()
@@ -190,7 +191,7 @@ def __init__(self, envs, mapsize=16 * 16):
             nn.ReLU(),
             layer_init(nn.Linear(128, 1), std=1),
         )
-        self.register_buffer('mask_value', torch.tensor(-1e8))
+        self.register_buffer("mask_value", torch.tensor(-1e8))
 
     def get_action_and_value(self, x, action=None, invalid_action_masks=None, envs=None, device=None):
         hidden = self.encoder(x)
@@ -227,7 +228,6 @@ def get_value(self, x):
         return self.critic(self.encoder(x))
 
 
-
 if __name__ == "__main__":
     args = parse_args()
 
@@ -307,7 +307,6 @@ def get_value(self, x):
 
     ## CRASH AND RESUME LOGIC:
     starting_update = 1
-    from jpype.types import JArray, JInt
 
     if args.prod_mode and wandb.run.resumed:
         starting_update = run.summary.get("charts/update") + 1
@@ -410,7 +409,7 @@ def get_value(self, x):
         b_values = values.reshape(-1)
         b_invalid_action_masks = invalid_action_masks.reshape((-1,) + invalid_action_shape)
 
-        # Optimizaing the policy and value network
+        # Optimizing the policy and value network
         inds = np.arange(
             args.batch_size,
         )
@@ -458,13 +457,24 @@ def get_value(self, x):
 
         ## CRASH AND RESUME LOGIC:
         if args.prod_mode:
-            if (update-1) % args.save_frequency == 0:
+            if (update - 1) % args.save_frequency == 0:
                 if not os.path.exists(f"models/{experiment_name}"):
                     os.makedirs(f"models/{experiment_name}")
                 torch.save(agent.state_dict(), f"models/{experiment_name}/agent.pt")
                 torch.save(agent.state_dict(), f"models/{experiment_name}/{global_step}.pt")
                 wandb.save(f"models/{experiment_name}/agent.pt", base_path=f"models/{experiment_name}", policy="now")
-                subprocess.Popen(["python", "league.py", "--evals", f"models/{experiment_name}/{global_step}.pt", "--update-db", "false", "--cuda", "false"])
+                subprocess.Popen(
+                    [
+                        "python",
+                        "league.py",
+                        "--evals",
+                        f"models/{experiment_name}/{global_step}.pt",
+                        "--update-db",
+                        "false",
+                        "--cuda",
+                        "false",
+                    ]
+                )
                 eval_queue += [f"models/{experiment_name}/{global_step}.pt"]
                 print(f"Evaluating models/{experiment_name}/{global_step}.pt")
 
@@ -484,8 +494,8 @@ def get_value(self, x):
                         trueskill_data = {
                             "name": league.loc[model_path].name,
                             "mu": league.loc[model_path]["mu"],
-                            "sigma":league.loc[model_path]["sigma"],
-                            "trueskill": league.loc[model_path]["trueskill"]
+                            "sigma": league.loc[model_path]["sigma"],
+                            "trueskill": league.loc[model_path]["trueskill"],
                         }
                         trueskill_df = trueskill_df.append(trueskill_data, ignore_index=True)
                         wandb.log({"trueskill": wandb.Table(dataframe=trueskill_df)})
@@ -494,7 +504,7 @@ def get_value(self, x):
                         trueskill_step_df = trueskill_step_df.append(trueskill_data, ignore_index=True)
                         preset_trueskill_step_df_clone = preset_trueskill_step_df.copy()
                         preset_trueskill_step_df_clone["step"] = model_global_step
-                        trueskill_step_df = trueskill_step_df.append(preset_trueskill_step_df_clone, ignore_index=True) 
+                        trueskill_step_df = trueskill_step_df.append(preset_trueskill_step_df_clone, ignore_index=True)
                         wandb.log({"trueskill_step": wandb.Table(dataframe=trueskill_step_df)})
 
         # TRY NOT TO MODIFY: record rewards for plotting purposes
diff --git a/experiments/ppo_gridnet_eval.py b/experiments/ppo_gridnet_eval.py
index 8ef98265..392e2890 100644
--- a/experiments/ppo_gridnet_eval.py
+++ b/experiments/ppo_gridnet_eval.py
@@ -8,15 +8,14 @@
 
 import numpy as np
 import torch
-import torch.nn as nn
 import torch.optim as optim
 from gym.spaces import MultiDiscrete
-from gym_microrts import microrts_ai
-from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv
-from stable_baselines3.common.vec_env import VecEnvWrapper, VecMonitor, VecVideoRecorder
-from torch.distributions.categorical import Categorical
-from torch.utils.tensorboard import SummaryWriter
 from ppo_gridnet import Agent, MicroRTSStatsRecorder
+from stable_baselines3.common.vec_env import VecMonitor, VecVideoRecorder
+from torch.utils.tensorboard import SummaryWriter
+
+from gym_microrts import microrts_ai  # noqa
+from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv
 
 
 def parse_args():
@@ -101,7 +100,6 @@ def parse_args():
     torch.manual_seed(args.seed)
     torch.backends.cudnn.deterministic = args.torch_deterministic
 
-
     ais = []
     if args.ai:
         ais = [eval(f"microrts_ai.{args.ai}")]
@@ -163,7 +161,7 @@ def parse_args():
             # ALGO LOGIC: put action logic here
             with torch.no_grad():
                 invalid_action_masks[step] = torch.tensor(np.array(envs.get_action_mask())).to(device)
-                
+
                 if args.ai:
                     action, logproba, _, _, vs = agent.get_action_and_value(
                         next_obs, envs=envs, invalid_action_masks=invalid_action_masks[step], device=device
@@ -173,7 +171,7 @@ def parse_args():
                     p2_obs = next_obs[1::2]
                     p1_mask = invalid_action_masks[step][::2]
                     p2_mask = invalid_action_masks[step][1::2]
-                    
+
                     p1_action, _, _, _, _ = agent.get_action_and_value(
                         p1_obs, envs=envs, invalid_action_masks=p1_mask, device=device
                     )
diff --git a/gym_microrts/envs/vec_env.py b/gym_microrts/envs/vec_env.py
index 2b183fe7..2a509ae2 100644
--- a/gym_microrts/envs/vec_env.py
+++ b/gym_microrts/envs/vec_env.py
@@ -1,23 +1,20 @@
-
-import os
 import json
+import os
 import xml.etree.ElementTree as ET
-import numpy as np
-from PIL import Image
 
 import gym
-import gym_microrts
-
 import jpype
-from jpype.imports import registerDomain
 import jpype.imports
+import numpy as np
+from jpype.imports import registerDomain
 from jpype.types import JArray, JInt
+from PIL import Image
+
+import gym_microrts
+
 
 class MicroRTSGridModeVecEnv:
-    metadata = {
-        'render.modes': ['human', 'rgb_array'],
-        'video.frames_per_second' : 150
-    }
+    metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 150}
     """
     [[0]x_coordinate*y_coordinate(x*y), [1]a_t(6), [2]p_move(4), [3]p_harvest(4), 
     [4]p_return(4), [5]p_produce_direction(4), [6]p_produce_unit_type(z), 
@@ -26,7 +23,8 @@ class MicroRTSGridModeVecEnv:
     :param env: gym3 environment to adapt
     """
 
-    def __init__(self,
+    def __init__(
+        self,
         num_selfplay_envs,
         num_bot_envs,
         partial_obs=False,
@@ -35,7 +33,8 @@ def __init__(self,
         frame_skip=0,
         ai2s=[],
         map_paths=["maps/10x10/basesTwoWorkers10x10.xml"],
-        reward_weight=np.array([0.0, 1.0, 0.0, 0.0, 0.0, 5.0])):
+        reward_weight=np.array([0.0, 1.0, 0.0, 0.0, 0.0, 5.0]),
+    ):
 
         self.num_selfplay_envs = num_selfplay_envs
         self.num_bot_envs = num_bot_envs
@@ -50,11 +49,13 @@ def __init__(self,
         if len(map_paths) == 1:
             self.map_paths = [map_paths[0] for _ in range(self.num_envs)]
         else:
-            assert len(map_paths) == self.num_envs, "if multiple maps are provided, they should be provided for each environment"
+            assert (
+                len(map_paths) == self.num_envs
+            ), "if multiple maps are provided, they should be provided for each environment"
         self.reward_weight = reward_weight
 
         # read map
-        self.microrts_path = os.path.join(gym_microrts.__path__[0], 'microrts')
+        self.microrts_path = os.path.join(gym_microrts.__path__[0], "microrts")
         root = ET.parse(os.path.join(self.microrts_path, self.map_paths[0])).getroot()
         self.height, self.width = int(root.get("height")), int(root.get("width"))
 
@@ -63,9 +64,15 @@ def __init__(self,
             registerDomain("ts", alias="tests")
             registerDomain("ai")
             jars = [
-                "microrts.jar", "lib/bots/Coac.jar", "lib/bots/Droplet.jar", "lib/bots/GRojoA3N.jar",
-                "lib/bots/Izanagi.jar", "lib/bots/MixedBot.jar", "lib/bots/TiamatBot.jar", "lib/bots/UMSBot.jar",
-                "lib/bots/mayariBot.jar" # "MindSeal.jar"
+                "microrts.jar",
+                "lib/bots/Coac.jar",
+                "lib/bots/Droplet.jar",
+                "lib/bots/GRojoA3N.jar",
+                "lib/bots/Izanagi.jar",
+                "lib/bots/MixedBot.jar",
+                "lib/bots/TiamatBot.jar",
+                "lib/bots/UMSBot.jar",
+                "lib/bots/mayariBot.jar",  # "MindSeal.jar"
             ]
             for jar in jars:
                 jpype.addClassPath(os.path.join(self.microrts_path, jar))
@@ -73,47 +80,56 @@ def __init__(self,
 
         # start microrts client
         from rts.units import UnitTypeTable
+
         self.real_utt = UnitTypeTable()
-        from ai.rewardfunction import RewardFunctionInterface, WinLossRewardFunction, ResourceGatherRewardFunction, AttackRewardFunction, ProduceWorkerRewardFunction, ProduceBuildingRewardFunction, ProduceCombatUnitRewardFunction, CloserToEnemyBaseRewardFunction
-        self.rfs = JArray(RewardFunctionInterface)([
-            WinLossRewardFunction(), 
-            ResourceGatherRewardFunction(),  
-            ProduceWorkerRewardFunction(),
-            ProduceBuildingRewardFunction(),
-            AttackRewardFunction(),
-            ProduceCombatUnitRewardFunction(),
-            # CloserToEnemyBaseRewardFunction(),
-        ])
+        from ai.rewardfunction import (AttackRewardFunction,
+                                       ProduceBuildingRewardFunction,
+                                       ProduceCombatUnitRewardFunction,
+                                       ProduceWorkerRewardFunction,
+                                       ResourceGatherRewardFunction,
+                                       RewardFunctionInterface,
+                                       WinLossRewardFunction)
+
+        self.rfs = JArray(RewardFunctionInterface)(
+            [
+                WinLossRewardFunction(),
+                ResourceGatherRewardFunction(),
+                ProduceWorkerRewardFunction(),
+                ProduceBuildingRewardFunction(),
+                AttackRewardFunction(),
+                ProduceCombatUnitRewardFunction(),
+                # CloserToEnemyBaseRewardFunction(),
+            ]
+        )
         self.start_client()
 
         # computed properties
-        # [num_planes_hp(5), num_planes_resources(5), num_planes_player(5), 
+        # [num_planes_hp(5), num_planes_resources(5), num_planes_player(5),
         # num_planes_unit_type(z), num_planes_unit_action(6)]
 
-        self.num_planes = [5, 5, 3, len(self.utt['unitTypes'])+1, 6]
+        self.num_planes = [5, 5, 3, len(self.utt["unitTypes"]) + 1, 6]
         if partial_obs:
-            self.num_planes = [5, 5, 3, len(self.utt['unitTypes'])+1, 6, 2]
-        self.observation_space = gym.spaces.Box(low=0.0,
-            high=1.0,
-            shape=(self.height, self.width,
-                    sum(self.num_planes)),
-                    dtype=np.int32)
+            self.num_planes = [5, 5, 3, len(self.utt["unitTypes"]) + 1, 6, 2]
+        self.observation_space = gym.spaces.Box(
+            low=0.0, high=1.0, shape=(self.height, self.width, sum(self.num_planes)), dtype=np.int32
+        )
 
         self.num_planes_len = len(self.num_planes)
         self.num_planes_prefix_sum = [0]
         for num_plane in self.num_planes:
             self.num_planes_prefix_sum.append(self.num_planes_prefix_sum[-1] + num_plane)
 
-        self.action_space_dims = [6, 4, 4, 4, 4, len(self.utt['unitTypes']), 7 * 7]
+        self.action_space_dims = [6, 4, 4, 4, 4, len(self.utt["unitTypes"]), 7 * 7]
         self.action_space = gym.spaces.MultiDiscrete(np.array([self.action_space_dims] * self.height * self.width).flatten())
         self.action_plane_space = gym.spaces.MultiDiscrete(self.action_space_dims)
-        self.source_unit_idxs = np.tile(np.arange(self.height*self.width), (self.num_envs,1))
+        self.source_unit_idxs = np.tile(np.arange(self.height * self.width), (self.num_envs, 1))
         self.source_unit_idxs = self.source_unit_idxs.reshape((self.source_unit_idxs.shape + (1,)))
-        
+
     def start_client(self):
 
-        from ts import JNIGridnetVecClient as Client
         from ai.core import AI
+        from ts import JNIGridnetVecClient as Client
+
         self.vec_client = Client(
             self.num_selfplay_envs,
             self.num_bot_envs,
@@ -125,34 +141,36 @@ def start_client(self):
             self.real_utt,
             self.partial_obs,
         )
-        self.render_client = self.vec_client.selfPlayClients[0] if len(self.vec_client.selfPlayClients) > 0 else self.vec_client.clients[0]
+        self.render_client = (
+            self.vec_client.selfPlayClients[0] if len(self.vec_client.selfPlayClients) > 0 else self.vec_client.clients[0]
+        )
         # get the unit type table
         self.utt = json.loads(str(self.render_client.sendUTT()))
 
     def reset(self):
-        responses = self.vec_client.reset([0]*self.num_envs)
+        responses = self.vec_client.reset([0] * self.num_envs)
         obs = [self._encode_obs(np.array(ro)) for ro in responses.observation]
         return np.array(obs)
 
     def _encode_obs(self, obs):
-        obs = obs.reshape(len(obs), -1).clip(0, np.array([self.num_planes]).T-1)
+        obs = obs.reshape(len(obs), -1).clip(0, np.array([self.num_planes]).T - 1)
         obs_planes = np.zeros((self.height * self.width, self.num_planes_prefix_sum[-1]), dtype=np.int32)
         obs_planes_idx = np.arange(len(obs_planes))
-        obs_planes[obs_planes_idx,obs[0]] = 1
+        obs_planes[obs_planes_idx, obs[0]] = 1
 
         for i in range(1, self.num_planes_len):
-            obs_planes[obs_planes_idx,obs[i]+self.num_planes_prefix_sum[i]] = 1
+            obs_planes[obs_planes_idx, obs[i] + self.num_planes_prefix_sum[i]] = 1
         return obs_planes.reshape(self.height, self.width, -1)
 
     def step_async(self, actions):
-        actions = actions.reshape((self.num_envs, self.width*self.height, -1))
-        actions = np.concatenate((self.source_unit_idxs, actions), 2) # specify source unit
-        actions = actions[np.where(self.source_unit_mask==1)] # valid actions
+        actions = actions.reshape((self.num_envs, self.width * self.height, -1))
+        actions = np.concatenate((self.source_unit_idxs, actions), 2)  # specify source unit
+        actions = actions[np.where(self.source_unit_mask == 1)]  # valid actions
         action_counts_per_env = self.source_unit_mask.sum(1)
-        java_actions = [None]*len(action_counts_per_env)
+        java_actions = [None] * len(action_counts_per_env)
         action_idx = 0
         for outer_idx, action_count in enumerate(action_counts_per_env):
-            java_valid_action = [None]*action_count
+            java_valid_action = [None] * action_count
             for idx in range(action_count):
                 java_valid_action[idx] = JArray(JInt)(actions[action_idx])
                 action_idx += 1
@@ -160,11 +178,11 @@ def step_async(self, actions):
         self.actions = JArray(JArray(JArray(JInt)))(java_actions)
 
     def step_wait(self):
-        responses = self.vec_client.gameStep(self.actions, [0]*self.num_envs)
+        responses = self.vec_client.gameStep(self.actions, [0] * self.num_envs)
         reward, done = np.array(responses.reward), np.array(responses.done)
         obs = [self._encode_obs(np.array(ro)) for ro in responses.observation]
         infos = [{"raw_rewards": item} for item in reward]
-        return np.array(obs), reward @ self.reward_weight, done[:,0], infos
+        return np.array(obs), reward @ self.reward_weight, done[:, 0], infos
 
     def step(self, ac):
         self.step_async(ac)
@@ -184,10 +202,10 @@ def getattr_depth_check(self, name, already_found):
     def render(self, mode="human"):
         if mode == "human":
             self.render_client.render(False)
-        elif mode == 'rgb_array':
+        elif mode == "rgb_array":
             bytes_array = np.array(self.render_client.render(True))
             image = Image.frombytes("RGB", (640, 640), bytes_array)
-            return np.array(image)[:,:,::-1]
+            return np.array(image)[:, :, ::-1]
 
     def close(self):
         if jpype._jpype.isStarted():
@@ -196,25 +214,24 @@ def close(self):
 
     def get_action_mask(self):
         action_mask = np.array(self.vec_client.getMasks(0))
-        self.source_unit_mask = action_mask[:,:,:,0].reshape(self.num_envs, -1)
-        action_type_and_parameter_mask = action_mask[:,:,:,1:].reshape(self.num_envs, self.height*self.width, -1)
+        self.source_unit_mask = action_mask[:, :, :, 0].reshape(self.num_envs, -1)
+        action_type_and_parameter_mask = action_mask[:, :, :, 1:].reshape(self.num_envs, self.height * self.width, -1)
         return action_type_and_parameter_mask
 
 
 class MicroRTSBotVecEnv(MicroRTSGridModeVecEnv):
-    metadata = {
-        'render.modes': ['human', 'rgb_array'],
-        'video.frames_per_second' : 150
-    }
+    metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 150}
 
-    def __init__(self,
+    def __init__(
+        self,
         ai1s=[],
         ai2s=[],
         partial_obs=False,
         max_steps=2000,
         render_theme=2,
         map_paths="maps/10x10/basesTwoWorkers10x10.xml",
-        reward_weight=np.array([0.0, 1.0, 0.0, 0.0, 0.0, 5.0])):
+        reward_weight=np.array([0.0, 1.0, 0.0, 0.0, 0.0, 5.0]),
+    ):
 
         self.ai1s = ai1s
         self.ai2s = ai2s
@@ -227,7 +244,7 @@ def __init__(self,
         self.reward_weight = reward_weight
 
         # read map
-        self.microrts_path = os.path.join(gym_microrts.__path__[0], 'microrts')
+        self.microrts_path = os.path.join(gym_microrts.__path__[0], "microrts")
         root = ET.parse(os.path.join(self.microrts_path, self.map_paths[0])).getroot()
         self.height, self.width = int(root.get("height")), int(root.get("width"))
 
@@ -237,9 +254,15 @@ def __init__(self,
             registerDomain("ai")
             registerDomain("rts")
             jars = [
-                "microrts.jar", "lib/bots/Coac.jar", "lib/bots/Droplet.jar", "lib/bots/GRojoA3N.jar",
-                "lib/bots/Izanagi.jar", "lib/bots/MixedBot.jar", "lib/bots/TiamatBot.jar", "lib/bots/UMSBot.jar",
-                "lib/bots/mayariBot.jar" # "MindSeal.jar"
+                "microrts.jar",
+                "lib/bots/Coac.jar",
+                "lib/bots/Droplet.jar",
+                "lib/bots/GRojoA3N.jar",
+                "lib/bots/Izanagi.jar",
+                "lib/bots/MixedBot.jar",
+                "lib/bots/TiamatBot.jar",
+                "lib/bots/UMSBot.jar",
+                "lib/bots/mayariBot.jar",  # "MindSeal.jar"
             ]
             for jar in jars:
                 jpype.addClassPath(os.path.join(self.microrts_path, jar))
@@ -247,33 +270,44 @@ def __init__(self,
 
         # start microrts client
         from rts.units import UnitTypeTable
+
         self.real_utt = UnitTypeTable()
-        from ai.rewardfunction import RewardFunctionInterface, WinLossRewardFunction, ResourceGatherRewardFunction, AttackRewardFunction, ProduceWorkerRewardFunction, ProduceBuildingRewardFunction, ProduceCombatUnitRewardFunction, CloserToEnemyBaseRewardFunction
-        self.rfs = JArray(RewardFunctionInterface)([
-            WinLossRewardFunction(), 
-            ResourceGatherRewardFunction(),  
-            ProduceWorkerRewardFunction(),
-            ProduceBuildingRewardFunction(),
-            AttackRewardFunction(),
-            ProduceCombatUnitRewardFunction(),
-            # CloserToEnemyBaseRewardFunction(),
-        ])
+        from ai.rewardfunction import (AttackRewardFunction,
+                                       ProduceBuildingRewardFunction,
+                                       ProduceCombatUnitRewardFunction,
+                                       ProduceWorkerRewardFunction,
+                                       ResourceGatherRewardFunction,
+                                       RewardFunctionInterface,
+                                       WinLossRewardFunction)
+
+        self.rfs = JArray(RewardFunctionInterface)(
+            [
+                WinLossRewardFunction(),
+                ResourceGatherRewardFunction(),
+                ProduceWorkerRewardFunction(),
+                ProduceBuildingRewardFunction(),
+                AttackRewardFunction(),
+                ProduceCombatUnitRewardFunction(),
+                # CloserToEnemyBaseRewardFunction(),
+            ]
+        )
         self.start_client()
 
         # computed properties
-        # [num_planes_hp(5), num_planes_resources(5), num_planes_player(5), 
+        # [num_planes_hp(5), num_planes_resources(5), num_planes_player(5),
         # num_planes_unit_type(z), num_planes_unit_action(6)]
 
-        self.num_planes = [5, 5, 3, len(self.utt['unitTypes'])+1, 6]
+        self.num_planes = [5, 5, 3, len(self.utt["unitTypes"]) + 1, 6]
         if partial_obs:
-            self.num_planes = [5, 5, 3, len(self.utt['unitTypes'])+1, 6, 2]
+            self.num_planes = [5, 5, 3, len(self.utt["unitTypes"]) + 1, 6, 2]
         self.observation_space = gym.spaces.Discrete(2)
         self.action_space = gym.spaces.Discrete(2)
 
     def start_client(self):
 
-        from ts import JNIGridnetVecClient as Client
         from ai.core import AI
+        from ts import JNIGridnetVecClient as Client
+
         self.vec_client = Client(
             self.max_steps,
             self.rfs,
@@ -290,7 +324,7 @@ def start_client(self):
 
     def reset(self):
         responses = self.vec_client.reset([0 for _ in range(self.num_envs)])
-        raw_obs, reward, done, info = np.ones((self.num_envs,2)), np.array(responses.reward), np.array(responses.done), {}
+        raw_obs, reward, done, info = np.ones((self.num_envs, 2)), np.array(responses.reward), np.array(responses.done), {}
         return raw_obs
 
     def step_async(self, actions):
@@ -298,9 +332,9 @@ def step_async(self, actions):
 
     def step_wait(self):
         responses = self.vec_client.gameStep(self.actions, [0 for _ in range(self.num_envs)])
-        raw_obs, reward, done = np.ones((self.num_envs,2)), np.array(responses.reward), np.array(responses.done)
+        raw_obs, reward, done = np.ones((self.num_envs, 2)), np.array(responses.reward), np.array(responses.done)
         infos = [{"raw_rewards": item} for item in reward]
-        return raw_obs, reward @ self.reward_weight, done[:,0], infos
+        return raw_obs, reward @ self.reward_weight, done[:, 0], infos
 
     def step(self, ac):
         self.step_async(ac)
@@ -320,10 +354,10 @@ def getattr_depth_check(self, name, already_found):
     def render(self, mode="human"):
         if mode == "human":
             self.render_client.render(False)
-        elif mode == 'rgb_array':
+        elif mode == "rgb_array":
             bytes_array = np.array(self.render_client.render(True))
             image = Image.frombytes("RGB", (640, 640), bytes_array)
-            return np.array(image)[:,:,::-1]
+            return np.array(image)[:, :, ::-1]
 
     def close(self):
         if jpype._jpype.isStarted():
@@ -377,13 +411,13 @@ def _allocate_shared_buffer(self, nbytes):
 
     def start_client(self):
 
-        from ts import JNIGridnetSharedMemVecClient as Client
         from ai.core import AI
         from rts import GameState
+        from ts import JNIGridnetSharedMemVecClient as Client
 
         self.num_feature_planes = GameState.numFeaturePlanes
         num_unit_types = len(self.real_utt.getUnitTypes())
-        self.action_space_dims = [6, 4, 4, 4, 4, num_unit_types, (self.real_utt.getMaxAttackRange()*2+1)**2]
+        self.action_space_dims = [6, 4, 4, 4, 4, num_unit_types, (self.real_utt.getMaxAttackRange() * 2 + 1) ** 2]
         self.masks_dim = sum(self.action_space_dims)
         self.action_dim = len(self.action_space_dims)
 
@@ -394,11 +428,11 @@ def start_client(self):
 
         action_mask_nbytes = self.num_envs * self.height * self.width * self.masks_dim * 4
         action_mask_jvm_buffer, action_mask_np_buffer = self._allocate_shared_buffer(action_mask_nbytes)
-        self.action_mask = action_mask_np_buffer.reshape((self.num_envs, self.height*self.width, self.masks_dim))
+        self.action_mask = action_mask_np_buffer.reshape((self.num_envs, self.height * self.width, self.masks_dim))
 
-        action_nbytes = self.num_envs * self.width*self.height * self.action_dim * 4
+        action_nbytes = self.num_envs * self.width * self.height * self.action_dim * 4
         action_jvm_buffer, action_np_buffer = self._allocate_shared_buffer(action_nbytes)
-        self.actions = action_np_buffer.reshape((self.num_envs, self.height*self.width, self.action_dim))
+        self.actions = action_np_buffer.reshape((self.num_envs, self.height * self.width, self.action_dim))
 
         self.vec_client = Client(
             self.num_selfplay_envs,
@@ -415,23 +449,25 @@ def start_client(self):
             action_jvm_buffer,
             0,
         )
-        self.render_client = self.vec_client.selfPlayClients[0] if len(self.vec_client.selfPlayClients) > 0 else self.vec_client.clients[0]
+        self.render_client = (
+            self.vec_client.selfPlayClients[0] if len(self.vec_client.selfPlayClients) > 0 else self.vec_client.clients[0]
+        )
         # get the unit type table
         self.utt = json.loads(str(self.render_client.sendUTT()))
 
     def reset(self):
-        self.vec_client.reset([0]*self.num_envs)
+        self.vec_client.reset([0] * self.num_envs)
         return self.obs
 
     def step_async(self, actions):
-        actions = actions.reshape((self.num_envs, self.width*self.height, self.action_dim))
+        actions = actions.reshape((self.num_envs, self.width * self.height, self.action_dim))
         np.copyto(self.actions, actions)
 
     def step_wait(self):
-        responses = self.vec_client.gameStep([0]*self.num_envs)
+        responses = self.vec_client.gameStep([0] * self.num_envs)
         reward, done = np.array(responses.reward), np.array(responses.done)
         infos = [{"raw_rewards": item} for item in reward]
-        return self.obs, reward @ self.reward_weight, done[:,0], infos
+        return self.obs, reward @ self.reward_weight, done[:, 0], infos
 
     def get_action_mask(self):
         self.vec_client.getMasks(0)
diff --git a/gym_microrts/microrts_ai.py b/gym_microrts/microrts_ai.py
index cf2be398..7b93e1c5 100644
--- a/gym_microrts/microrts_ai.py
+++ b/gym_microrts/microrts_ai.py
@@ -1,78 +1,114 @@
 def randomBiasedAI(utt):
     from ai import RandomBiasedAI
+
     return RandomBiasedAI()
 
+
 def randomAI(utt):
     from ai import RandomBiasedSingleUnitAI
+
     return RandomBiasedSingleUnitAI()
 
+
 def passiveAI(utt):
     from ai import PassiveAI
+
     return PassiveAI()
 
+
 def workerRushAI(utt):
     from ai.abstraction import WorkerRush
+
     return WorkerRush(utt)
 
+
 def lightRushAI(utt):
     from ai.abstraction import LightRush
+
     return LightRush(utt)
 
+
 def POLightRush(utt):
     from ai.abstraction.partialobservability import POLightRush
+
     return POLightRush(utt)
 
+
 def POWorkerRush(utt):
     from ai.abstraction.partialobservability import POWorkerRush
+
     return POWorkerRush(utt)
 
+
 def POHeavyRush(utt):
     from ai.abstraction.partialobservability import POHeavyRush
+
     return POHeavyRush(utt)
 
+
 def PORangedRush(utt):
     from ai.abstraction.partialobservability import PORangedRush
+
     return PORangedRush(utt)
+
+
 # Competition AIs
 
+
 def coacAI(utt):
     from ai.coac import CoacAI
+
     return CoacAI(utt)
 
+
 def naiveMCTSAI(utt):
     from ai.mcts.naivemcts import NaiveMCTS
+
     return NaiveMCTS(utt)
 
+
 # /~https://github.com/AmoyZhp/MixedBotmRTS
 def mixedBot(utt):
     from ai.JZ import MixedBot
+
     return MixedBot(utt)
 
+
 # /~https://github.com/jr9Hernandez/RojoBot
 def rojo(utt):
     from ai.competition.rojobot import Rojo
+
     return Rojo(utt)
 
+
 # /~https://github.com/rubensolv/IzanagiBot
 def izanagi(utt):
     from ai.competition.IzanagiBot import Izanagi
+
     return Izanagi(utt)
 
+
 # /~https://github.com/jr9Hernandez/TiamatBot
 def tiamat(utt):
     from ai.competition.tiamat import Tiamat
+
     return Tiamat(utt)
 
+
 # /~https://github.com/zuozhiyang/Droplet/blob/master/GNS/Droplet.java
 def droplet(utt):
     from GNS import Droplet
+
     return Droplet(utt)
 
+
 # /~https://github.com/barvazkrav/mayariBot/blob/master/mayari.java
 def mayari(utt):
     from mayariBot import mayari
+
     return mayari(utt)
 
+
 # # /~https://github.com/zuozhiyang/MentalSeal
 # def mentalSeal(utt):
 #     from MentalSeal import MentalSeal
@@ -81,8 +117,10 @@ def mayari(utt):
 # /~https://github.com/rubensolv/GRojoA3N
 def guidedRojoA3N(utt):
     from ai.competition.GRojoA3N import GuidedRojoA3N
+
     return GuidedRojoA3N(utt)
 
+
 ALL_AIS = [
     randomBiasedAI,
     randomAI,
@@ -91,4 +129,4 @@ def guidedRojoA3N(utt):
     lightRushAI,
     coacAI,
     naiveMCTSAI,
-]
\ No newline at end of file
+]
diff --git a/gym_microrts/microrts_maps.py b/gym_microrts/microrts_maps.py
index cfa6f477..34a2f2e9 100644
--- a/gym_microrts/microrts_maps.py
+++ b/gym_microrts/microrts_maps.py
@@ -18,4 +18,4 @@
     "maps/16x16/basesWorkers16x16H.xml",
     "maps/16x16/basesWorkers16x16L.xml",
     "maps/16x16/EightBasesWorkers16x16.xml",
-]
\ No newline at end of file
+]
diff --git a/hello_world.py b/hello_world.py
index dac3e8a0..999e9847 100644
--- a/hello_world.py
+++ b/hello_world.py
@@ -1,11 +1,13 @@
 import numpy as np
 from numpy.random import choice
-# if you want to record videos, install stable-baselines3 and use its `VecVideoRecorder`
-# from stable_baselines3.common.vec_env import VecVideoRecorder
 
 from gym_microrts import microrts_ai
 from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv
 
+# if you want to record videos, install stable-baselines3 and use its `VecVideoRecorder`
+# from stable_baselines3.common.vec_env import VecVideoRecorder
+
+
 envs = MicroRTSGridModeVecEnv(
     num_selfplay_envs=0,
     num_bot_envs=1,
@@ -13,29 +15,33 @@
     render_theme=2,
     ai2s=[microrts_ai.coacAI for _ in range(1)],
     map_paths=["maps/16x16/basesWorkers16x16.xml"],
-    reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0])
+    reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]),
 )
 # envs = VecVideoRecorder(envs, 'videos', record_video_trigger=lambda x: x % 4000 == 0, video_length=2000)
 
+
 def softmax(x):
     """Compute softmax values for each sets of scores in x."""
     e_x = np.exp(x - np.max(x))
     return e_x / e_x.sum()
 
+
 def sample(logits):
     # sample 1 or 2 from logits [0, 1 ,1, 0] but not 0 or 3
-    if sum(logits) == 0: return 0
-    return choice(range(len(logits)), p=logits/sum(logits))
+    if sum(logits) == 0:
+        return 0
+    return choice(range(len(logits)), p=logits / sum(logits))
+
 
 envs.action_space.seed(0)
 envs.reset()
 print(envs.action_plane_space.nvec)
 nvec = envs.action_space.nvec
 
+
 def sample(logits):
-    return np.array(
-        [choice(range(len(item)), p=softmax(item)) for item in logits]
-    ).reshape(-1, 1)
+    return np.array([choice(range(len(item)), p=softmax(item)) for item in logits]).reshape(-1, 1)
+
 
 for i in range(10000):
     envs.render()
@@ -45,16 +51,19 @@ def sample(logits):
     # but we want to remove PyTorch as a core dependency...
     action_mask = envs.get_action_mask()
     action_mask = action_mask.reshape(-1, action_mask.shape[-1])
-    action_type_mask = action_mask[:,0:6]
-    action = np.concatenate((
-        sample(action_mask[:,0:6]), # action type
-        sample(action_mask[:,6:10]), # move parameter
-        sample(action_mask[:,10:14]), # harvest parameter
-        sample(action_mask[:,14:18]), # return parameter
-        sample(action_mask[:,18:22]), # produce_direction parameter
-        sample(action_mask[:,22:29]), # produce_unit_type parameter
-        sample(action_mask[:,29:sum(envs.action_space.nvec[1:])]), # attack_target parameter
-    ), axis=1)
+    action_type_mask = action_mask[:, 0:6]
+    action = np.concatenate(
+        (
+            sample(action_mask[:, 0:6]),  # action type
+            sample(action_mask[:, 6:10]),  # move parameter
+            sample(action_mask[:, 10:14]),  # harvest parameter
+            sample(action_mask[:, 14:18]),  # return parameter
+            sample(action_mask[:, 18:22]),  # produce_direction parameter
+            sample(action_mask[:, 22:29]),  # produce_unit_type parameter
+            sample(action_mask[:, 29 : sum(envs.action_space.nvec[1:])]),  # attack_target parameter
+        ),
+        axis=1,
+    )
     action = np.array([envs.action_space.sample()])
     next_obs, reward, done, info = envs.step(action)
 envs.close()
diff --git a/tests/test_e2e.py b/tests/test_e2e.py
index 3ab9b22c..4abeff36 100644
--- a/tests/test_e2e.py
+++ b/tests/test_e2e.py
@@ -9,12 +9,11 @@ def test_ppo_gridnet():
             shell=True,
             check=True,
         )
-    except subprocess.CalledProcessError as grepexc:                                                                                                   
+    except subprocess.CalledProcessError as grepexc:
         print("error code", grepexc.returncode, grepexc.output)
         assert grepexc.returncode in [0, 134]
 
 
-
 def test_ppo_gridnet_eval_selfplay():
     try:
         subprocess.run(
@@ -22,7 +21,7 @@ def test_ppo_gridnet_eval_selfplay():
             shell=True,
             check=True,
         )
-    except subprocess.CalledProcessError as grepexc:                                                                                                   
+    except subprocess.CalledProcessError as grepexc:
         print("error code", grepexc.returncode, grepexc.output)
         assert grepexc.returncode in [0, 134]
 
diff --git a/tests/test_mask.py b/tests/test_mask.py
index 414865f2..0d74fef3 100644
--- a/tests/test_mask.py
+++ b/tests/test_mask.py
@@ -1,11 +1,11 @@
 import numpy as np
 
-
 from gym_microrts import microrts_ai
 from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv
 
 render = False
 
+
 def test_mask():
     envs = MicroRTSGridModeVecEnv(
         num_selfplay_envs=0,
@@ -14,70 +14,271 @@ def test_mask():
         render_theme=2,
         ai2s=[microrts_ai.passiveAI for _ in range(1)],
         map_paths=["maps/4x4/baseTwoWorkers4x4.xml"],
-        reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0])
+        reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]),
     )
     envs.action_space.seed(0)
     try:
-        obs = envs.reset()
+        envs.reset()
         # if render: envs.render()
     except Exception as e:
         e.printStackTrace()
-    num_planes = len(envs.action_plane_space.nvec)
+    len(envs.action_plane_space.nvec)
 
     np.testing.assert_array_equal(
-        np.array(envs.get_action_mask())[0,1],
-        np.array([
-            1, 1, 1, 0, 1, 0,
-            0, 1, 0, 0,
-            0, 0, 0, 1,
-            0, 0, 0, 0,
-            0, 1, 0, 0,
-            0, 0, 1, 0, 0, 0, 0,
-            # relative attack position below
-            0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0,
-        ]).astype(np.int32),
+        np.array(envs.get_action_mask())[0, 1],
+        np.array(
+            [
+                1,
+                1,
+                1,
+                0,
+                1,
+                0,
+                0,
+                1,
+                0,
+                0,
+                0,
+                0,
+                0,
+                1,
+                0,
+                0,
+                0,
+                0,
+                0,
+                1,
+                0,
+                0,
+                0,
+                0,
+                1,
+                0,
+                0,
+                0,
+                0,
+                # relative attack position below
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+            ]
+        ).astype(np.int32),
     )
     np.testing.assert_array_equal(
-        np.array(envs.get_action_mask())[0,4],
-        np.array([
-            1, 1, 1, 0, 1, 0,
-            0, 0, 1, 0,
-            1, 0, 0, 0,
-            0, 0, 0, 0,
-            0, 0, 1, 0,
-            0, 0, 1, 0, 0, 0, 0,
-            # relative attack position below
-            0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0,
-        ]).astype(np.int32),
+        np.array(envs.get_action_mask())[0, 4],
+        np.array(
+            [
+                1,
+                1,
+                1,
+                0,
+                1,
+                0,
+                0,
+                0,
+                1,
+                0,
+                1,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                1,
+                0,
+                0,
+                0,
+                1,
+                0,
+                0,
+                0,
+                0,
+                # relative attack position below
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+            ]
+        ).astype(np.int32),
     )
     np.testing.assert_array_equal(
-        np.array(envs.get_action_mask())[0,5],
-        np.array([
-            1, 0, 0, 0, 1, 0,
-            0, 0, 0, 0,
-            0, 0, 0, 0,
-            0, 0, 0, 0,
-            0, 1, 1, 0,
-            0, 0, 0, 1, 0, 0, 0,
-            # relative attack position below
-            0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0,
-        ]).astype(np.int32),
+        np.array(envs.get_action_mask())[0, 5],
+        np.array(
+            [
+                1,
+                0,
+                0,
+                0,
+                1,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                1,
+                1,
+                0,
+                0,
+                0,
+                0,
+                1,
+                0,
+                0,
+                0,
+                # relative attack position below
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+            ]
+        ).astype(np.int32),
     )
diff --git a/tests/test_observation.py b/tests/test_observation.py
index b7eaf5f0..23baeee9 100644
--- a/tests/test_observation.py
+++ b/tests/test_observation.py
@@ -1,11 +1,10 @@
 import numpy as np
 
-
-from gym_microrts import microrts_ai
 from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv
 
 render = False
 
+
 def test_observation():
     envs = MicroRTSGridModeVecEnv(
         num_bot_envs=0,
@@ -19,38 +18,146 @@ def test_observation():
     )
 
     next_obs = envs.reset()
-    resource = np.array([
-        0., 1., 0., 0., 0., # 1 hp
-        0., 0., 0., 0., 1., # >= 4 resources
-        1., 0., 0.,         # no owner
-        0., 1., 0., 0., 0., 0., 0., 0.,  # unit type resource
-        1., 0., 0., 0., 0., 0.  # currently not executing actions
-    ]).astype(np.int32)
-    p1_worker = np.array([
-        0., 1., 0., 0., 0., # 1 hp
-        1., 0., 0., 0., 0., # 0 resources
-        0., 1., 0.,         # player 1 owns it 
-        0., 0., 0., 0., 1., 0., 0., 0., # unit type worker
-        1., 0., 0., 0., 0., 0. # currently not executing actions
-    ]).astype(np.int32)
-    p1_base = np.array([
-        0., 0., 0., 0., 1.,  # 1 hp
-        1., 0., 0., 0., 0.,  # 0 resources
-        0., 1., 0.,          # player 1 owns it
-        0., 0., 1., 0., 0., 0., 0., 0., # unit type base
-        1., 0., 0., 0., 0., 0. # currently not executing actions
-    ]).astype(np.int32)
+    resource = np.array(
+        [
+            0.0,
+            1.0,
+            0.0,
+            0.0,
+            0.0,  # 1 hp
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            1.0,  # >= 4 resources
+            1.0,
+            0.0,
+            0.0,  # no owner
+            0.0,
+            1.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,  # unit type resource
+            1.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,  # currently not executing actions
+        ]
+    ).astype(np.int32)
+    p1_worker = np.array(
+        [
+            0.0,
+            1.0,
+            0.0,
+            0.0,
+            0.0,  # 1 hp
+            1.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,  # 0 resources
+            0.0,
+            1.0,
+            0.0,  # player 1 owns it
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            1.0,
+            0.0,
+            0.0,
+            0.0,  # unit type worker
+            1.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,  # currently not executing actions
+        ]
+    ).astype(np.int32)
+    p1_base = np.array(
+        [
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            1.0,  # 1 hp
+            1.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,  # 0 resources
+            0.0,
+            1.0,
+            0.0,  # player 1 owns it
+            0.0,
+            0.0,
+            1.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,  # unit type base
+            1.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,  # currently not executing actions
+        ]
+    ).astype(np.int32)
     p2_worker = p1_worker.copy()
-    p2_worker[10:13] = np.array([0., 0., 1.,]) # player 2 owns it
+    p2_worker[10:13] = np.array(
+        [
+            0.0,
+            0.0,
+            1.0,
+        ]
+    )  # player 2 owns it
     p2_base = p1_base.copy()
-    p2_base[10:13] = np.array([0., 0., 1.,]) # player 2 owns it
-    empty_cell = np.array([
-        1., 0., 0., 0., 0.,  # 0 hp
-        1., 0., 0., 0., 0.,  # 0 resources
-        1., 0., 0.,          # no owner
-        1., 0., 0., 0., 0., 0., 0., 0., # unit type empty cell
-        1., 0., 0., 0., 0., 0. # currently not executing actions
-    ]).astype(np.int32)
+    p2_base[10:13] = np.array(
+        [
+            0.0,
+            0.0,
+            1.0,
+        ]
+    )  # player 2 owns it
+    empty_cell = np.array(
+        [
+            1.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,  # 0 hp
+            1.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,  # 0 resources
+            1.0,
+            0.0,
+            0.0,  # no owner
+            1.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,  # unit type empty cell
+            1.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,  # currently not executing actions
+        ]
+    ).astype(np.int32)
 
     # player 1's perspective
     np.testing.assert_array_equal(next_obs[0][0][0], resource)
@@ -72,11 +179,9 @@ def test_observation():
     # np.testing.assert_array_equal(next_obs[1][14][15], resource) # BUG: in `MicroRTSGridModeVecEnv` the onwer is correctly set to [0, 1, 0]
     np.testing.assert_array_equal(next_obs[1][14][14], p1_worker)
     np.testing.assert_array_equal(next_obs[1][13][13], p1_base)
-    
 
     feature_sum = 0
     for item in [resource, resource, p1_worker, p1_base, resource, resource, p2_worker, p2_base]:
         feature_sum += item.sum()
     feature_sum += empty_cell.sum() * (256 - 8)
     assert next_obs.sum() == feature_sum * 2 == 2560.0
-
diff --git a/tests/test_reward.py b/tests/test_reward.py
index f1de2f6e..90314c59 100644
--- a/tests/test_reward.py
+++ b/tests/test_reward.py
@@ -1,11 +1,11 @@
 import numpy as np
 
-
 from gym_microrts import microrts_ai
 from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv
 
 render = False
 
+
 def test_reward():
     envs = MicroRTSGridModeVecEnv(
         num_selfplay_envs=0,
@@ -14,37 +14,41 @@ def test_reward():
         render_theme=2,
         ai2s=[microrts_ai.passiveAI for _ in range(1)],
         map_paths=["maps/4x4/baseTwoWorkers4x4.xml"],
-        reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0])
+        reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]),
     )
     envs.action_space.seed(0)
     try:
-        obs = envs.reset()
-        if render: envs.render()
+        envs.reset()
+        if render:
+            envs.render()
     except Exception as e:
         e.printStackTrace()
     num_planes = len(envs.action_plane_space.nvec)
 
-
     # mine
     np.array(envs.get_action_mask())
     action = np.zeros(len(envs.action_space.nvec), np.int32)
-    action[1*num_planes:(1+1)*num_planes] = [2, 0, 3, 0, 0, 0, 0]
+    action[1 * num_planes : (1 + 1) * num_planes] = [2, 0, 3, 0, 0, 0, 0]
     assert envs.step(action)[1].flatten() > 0
-    if render: envs.render()
+    if render:
+        envs.render()
 
     # wait for action to finish
     for _ in range(20):
         np.array(envs.get_action_mask())
         action = np.zeros(len(envs.action_space.nvec), np.int32)
         envs.step(action)
-        if render: envs.render()
+        if render:
+            envs.render()
 
     # return
     np.array(envs.get_action_mask())
     action = np.zeros(len(envs.action_space.nvec), np.int32)
-    action[1*num_planes:(1+1)*num_planes] = [3, 0, 0, 2, 0, 0, 0]
+    action[1 * num_planes : (1 + 1) * num_planes] = [3, 0, 0, 2, 0, 0, 0]
     assert envs.step(action)[1].flatten() > 0
-    if render: envs.render()
+    if render:
+        envs.render()
+
 
 def test_produce_worker_reward():
     envs = MicroRTSGridModeVecEnv(
@@ -54,23 +58,24 @@ def test_produce_worker_reward():
         render_theme=2,
         ai2s=[microrts_ai.passiveAI for _ in range(1)],
         map_paths=["maps/4x4/baseTwoWorkers4x4.xml"],
-        reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0])
+        reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]),
     )
     envs.action_space.seed(0)
     try:
-        obs = envs.reset()
-        if render: envs.render()
+        envs.reset()
+        if render:
+            envs.render()
     except Exception as e:
         e.printStackTrace()
     num_planes = len(envs.action_plane_space.nvec)
 
-
     # produce a worker
     np.array(envs.get_action_mask())
     action = np.zeros(len(envs.action_space.nvec), np.int32)
-    action[5*num_planes:(5+1)*num_planes] = [4, 0, 0, 0, 1, 3, 0]
+    action[5 * num_planes : (5 + 1) * num_planes] = [4, 0, 0, 0, 1, 3, 0]
     assert envs.step(action)[1].flatten() > 0
-    if render: envs.render()
+    if render:
+        envs.render()
 
 
 def test_produce_buildings_reward():
@@ -81,20 +86,21 @@ def test_produce_buildings_reward():
         render_theme=2,
         ai2s=[microrts_ai.passiveAI for _ in range(1)],
         map_paths=["maps/4x4/baseTwoWorkers4x4.xml"],
-        reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0])
+        reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]),
     )
     envs.action_space.seed(0)
     try:
-        obs = envs.reset()
-        if render: envs.render()
+        envs.reset()
+        if render:
+            envs.render()
     except Exception as e:
         e.printStackTrace()
     num_planes = len(envs.action_plane_space.nvec)
 
-
     # produce a worker
     np.array(envs.get_action_mask())
     action = np.zeros(len(envs.action_space.nvec), np.int32)
-    action[4*num_planes:(4+1)*num_planes] = [4, 0, 0, 0, 2, 2, 0]
+    action[4 * num_planes : (4 + 1) * num_planes] = [4, 0, 0, 0, 2, 2, 0]
     assert envs.step(action)[1].flatten() > 0
-    if render: envs.render()
+    if render:
+        envs.render()

From a75b3058ac240da6fa8031422bea3d5ef352b3c8 Mon Sep 17 00:00:00 2001
From: Costa Huang <costa.huang@outlook.com>
Date: Wed, 26 Jan 2022 15:04:51 -0500
Subject: [PATCH 3/6] add black profile

---
 .pre-commit-config.yaml      |  3 ++-
 experiments/league.py        | 14 +++++++++++---
 experiments/ppo_gridnet.py   |  3 +--
 gym_microrts/envs/vec_env.py | 32 ++++++++++++++++++--------------
 pyproject.toml               |  2 +-
 5 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index dcc8ad9e..0f8d0b76 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -4,7 +4,8 @@ repos:
     hooks:
       - id: isort
         args:
-          - --skip wandb
+          - --profile=black
+          - --skip=wandb
   - repo: /~https://github.com/myint/autoflake
     rev: v1.4
     hooks:
diff --git a/experiments/league.py b/experiments/league.py
index bb708928..aa315a94 100644
--- a/experiments/league.py
+++ b/experiments/league.py
@@ -12,9 +12,17 @@
 import numpy as np
 import pandas as pd
 import torch
-from peewee import (JOIN, CharField, DateTimeField, FloatField,
-                    ForeignKeyField, Model, SmallIntegerField, SqliteDatabase,
-                    fn)
+from peewee import (
+    JOIN,
+    CharField,
+    DateTimeField,
+    FloatField,
+    ForeignKeyField,
+    Model,
+    SmallIntegerField,
+    SqliteDatabase,
+    fn,
+)
 from ppo_gridnet import Agent, MicroRTSStatsRecorder
 from stable_baselines3.common.vec_env import VecMonitor
 from trueskill import Rating, quality_1vs1, rate_1vs1
diff --git a/experiments/ppo_gridnet.py b/experiments/ppo_gridnet.py
index 4754c4b3..4e8945f1 100644
--- a/experiments/ppo_gridnet.py
+++ b/experiments/ppo_gridnet.py
@@ -13,8 +13,7 @@
 import torch.nn as nn
 import torch.optim as optim
 from gym.spaces import MultiDiscrete
-from stable_baselines3.common.vec_env import (VecEnvWrapper, VecMonitor,
-                                              VecVideoRecorder)
+from stable_baselines3.common.vec_env import VecEnvWrapper, VecMonitor, VecVideoRecorder
 from torch.distributions.categorical import Categorical
 from torch.utils.tensorboard import SummaryWriter
 
diff --git a/gym_microrts/envs/vec_env.py b/gym_microrts/envs/vec_env.py
index 2a509ae2..5bc1ec5f 100644
--- a/gym_microrts/envs/vec_env.py
+++ b/gym_microrts/envs/vec_env.py
@@ -82,13 +82,15 @@ def __init__(
         from rts.units import UnitTypeTable
 
         self.real_utt = UnitTypeTable()
-        from ai.rewardfunction import (AttackRewardFunction,
-                                       ProduceBuildingRewardFunction,
-                                       ProduceCombatUnitRewardFunction,
-                                       ProduceWorkerRewardFunction,
-                                       ResourceGatherRewardFunction,
-                                       RewardFunctionInterface,
-                                       WinLossRewardFunction)
+        from ai.rewardfunction import (
+            AttackRewardFunction,
+            ProduceBuildingRewardFunction,
+            ProduceCombatUnitRewardFunction,
+            ProduceWorkerRewardFunction,
+            ResourceGatherRewardFunction,
+            RewardFunctionInterface,
+            WinLossRewardFunction,
+        )
 
         self.rfs = JArray(RewardFunctionInterface)(
             [
@@ -272,13 +274,15 @@ def __init__(
         from rts.units import UnitTypeTable
 
         self.real_utt = UnitTypeTable()
-        from ai.rewardfunction import (AttackRewardFunction,
-                                       ProduceBuildingRewardFunction,
-                                       ProduceCombatUnitRewardFunction,
-                                       ProduceWorkerRewardFunction,
-                                       ResourceGatherRewardFunction,
-                                       RewardFunctionInterface,
-                                       WinLossRewardFunction)
+        from ai.rewardfunction import (
+            AttackRewardFunction,
+            ProduceBuildingRewardFunction,
+            ProduceCombatUnitRewardFunction,
+            ProduceWorkerRewardFunction,
+            ResourceGatherRewardFunction,
+            RewardFunctionInterface,
+            WinLossRewardFunction,
+        )
 
         self.rfs = JArray(RewardFunctionInterface)(
             [
diff --git a/pyproject.toml b/pyproject.toml
index c31cf1f1..c9ee721c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,4 +42,4 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry.extras]
 spyder = ["spyder"]
-cleanrl = ["cleanrl"]
\ No newline at end of file
+cleanrl = ["cleanrl"]

From bcbe3beeef7784d808d419ae91385c19ad6f18ae Mon Sep 17 00:00:00 2001
From: Costa Huang <costa.huang@outlook.com>
Date: Wed, 26 Jan 2022 17:12:06 -0500
Subject: [PATCH 4/6] revert formatting

---
 tests/test_mask.py        | 305 +++++++-------------------------------
 tests/test_observation.py | 170 ++++-----------------
 2 files changed, 85 insertions(+), 390 deletions(-)

diff --git a/tests/test_mask.py b/tests/test_mask.py
index 0d74fef3..71c63007 100644
--- a/tests/test_mask.py
+++ b/tests/test_mask.py
@@ -24,261 +24,62 @@ def test_mask():
         e.printStackTrace()
     len(envs.action_plane_space.nvec)
 
+    # fmt: off
     np.testing.assert_array_equal(
-        np.array(envs.get_action_mask())[0, 1],
-        np.array(
-            [
-                1,
-                1,
-                1,
-                0,
-                1,
-                0,
-                0,
-                1,
-                0,
-                0,
-                0,
-                0,
-                0,
-                1,
-                0,
-                0,
-                0,
-                0,
-                0,
-                1,
-                0,
-                0,
-                0,
-                0,
-                1,
-                0,
-                0,
-                0,
-                0,
-                # relative attack position below
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-            ]
-        ).astype(np.int32),
+        np.array(envs.get_action_mask())[0,1],
+        np.array([
+            1, 1, 1, 0, 1, 0,
+            0, 1, 0, 0,
+            0, 0, 0, 1,
+            0, 0, 0, 0,
+            0, 1, 0, 0,
+            0, 0, 1, 0, 0, 0, 0,
+            # relative attack position below
+            0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0,
+        ]).astype(np.int32),
     )
     np.testing.assert_array_equal(
-        np.array(envs.get_action_mask())[0, 4],
-        np.array(
-            [
-                1,
-                1,
-                1,
-                0,
-                1,
-                0,
-                0,
-                0,
-                1,
-                0,
-                1,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                1,
-                0,
-                0,
-                0,
-                1,
-                0,
-                0,
-                0,
-                0,
-                # relative attack position below
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-            ]
-        ).astype(np.int32),
+        np.array(envs.get_action_mask())[0,4],
+        np.array([
+            1, 1, 1, 0, 1, 0,
+            0, 0, 1, 0,
+            1, 0, 0, 0,
+            0, 0, 0, 0,
+            0, 0, 1, 0,
+            0, 0, 1, 0, 0, 0, 0,
+            # relative attack position below
+            0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0,
+        ]).astype(np.int32),
     )
     np.testing.assert_array_equal(
-        np.array(envs.get_action_mask())[0, 5],
-        np.array(
-            [
-                1,
-                0,
-                0,
-                0,
-                1,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                1,
-                1,
-                0,
-                0,
-                0,
-                0,
-                1,
-                0,
-                0,
-                0,
-                # relative attack position below
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
-            ]
-        ).astype(np.int32),
+        np.array(envs.get_action_mask())[0,5],
+        np.array([
+            1, 0, 0, 0, 1, 0,
+            0, 0, 0, 0,
+            0, 0, 0, 0,
+            0, 0, 0, 0,
+            0, 1, 1, 0,
+            0, 0, 0, 1, 0, 0, 0,
+            # relative attack position below
+            0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0,
+        ]).astype(np.int32),
     )
+    # fmt: on
diff --git a/tests/test_observation.py b/tests/test_observation.py
index 23baeee9..29c311ba 100644
--- a/tests/test_observation.py
+++ b/tests/test_observation.py
@@ -17,147 +17,41 @@ def test_observation():
         reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]),
     )
 
+    # fmt: off
     next_obs = envs.reset()
-    resource = np.array(
-        [
-            0.0,
-            1.0,
-            0.0,
-            0.0,
-            0.0,  # 1 hp
-            0.0,
-            0.0,
-            0.0,
-            0.0,
-            1.0,  # >= 4 resources
-            1.0,
-            0.0,
-            0.0,  # no owner
-            0.0,
-            1.0,
-            0.0,
-            0.0,
-            0.0,
-            0.0,
-            0.0,
-            0.0,  # unit type resource
-            1.0,
-            0.0,
-            0.0,
-            0.0,
-            0.0,
-            0.0,  # currently not executing actions
-        ]
-    ).astype(np.int32)
-    p1_worker = np.array(
-        [
-            0.0,
-            1.0,
-            0.0,
-            0.0,
-            0.0,  # 1 hp
-            1.0,
-            0.0,
-            0.0,
-            0.0,
-            0.0,  # 0 resources
-            0.0,
-            1.0,
-            0.0,  # player 1 owns it
-            0.0,
-            0.0,
-            0.0,
-            0.0,
-            1.0,
-            0.0,
-            0.0,
-            0.0,  # unit type worker
-            1.0,
-            0.0,
-            0.0,
-            0.0,
-            0.0,
-            0.0,  # currently not executing actions
-        ]
-    ).astype(np.int32)
-    p1_base = np.array(
-        [
-            0.0,
-            0.0,
-            0.0,
-            0.0,
-            1.0,  # 1 hp
-            1.0,
-            0.0,
-            0.0,
-            0.0,
-            0.0,  # 0 resources
-            0.0,
-            1.0,
-            0.0,  # player 1 owns it
-            0.0,
-            0.0,
-            1.0,
-            0.0,
-            0.0,
-            0.0,
-            0.0,
-            0.0,  # unit type base
-            1.0,
-            0.0,
-            0.0,
-            0.0,
-            0.0,
-            0.0,  # currently not executing actions
-        ]
-    ).astype(np.int32)
+    resource = np.array([
+        0., 1., 0., 0., 0., # 1 hp
+        0., 0., 0., 0., 1., # >= 4 resources
+        1., 0., 0.,         # no owner
+        0., 1., 0., 0., 0., 0., 0., 0.,  # unit type resource
+        1., 0., 0., 0., 0., 0.  # currently not executing actions
+    ]).astype(np.int32)
+    p1_worker = np.array([
+        0., 1., 0., 0., 0., # 1 hp
+        1., 0., 0., 0., 0., # 0 resources
+        0., 1., 0.,         # player 1 owns it 
+        0., 0., 0., 0., 1., 0., 0., 0., # unit type worker
+        1., 0., 0., 0., 0., 0. # currently not executing actions
+    ]).astype(np.int32)
+    p1_base = np.array([
+        0., 0., 0., 0., 1.,  # 1 hp
+        1., 0., 0., 0., 0.,  # 0 resources
+        0., 1., 0.,          # player 1 owns it
+        0., 0., 1., 0., 0., 0., 0., 0., # unit type base
+        1., 0., 0., 0., 0., 0. # currently not executing actions
+    ]).astype(np.int32)
     p2_worker = p1_worker.copy()
-    p2_worker[10:13] = np.array(
-        [
-            0.0,
-            0.0,
-            1.0,
-        ]
-    )  # player 2 owns it
+    p2_worker[10:13] = np.array([0., 0., 1.,]) # player 2 owns it
     p2_base = p1_base.copy()
-    p2_base[10:13] = np.array(
-        [
-            0.0,
-            0.0,
-            1.0,
-        ]
-    )  # player 2 owns it
-    empty_cell = np.array(
-        [
-            1.0,
-            0.0,
-            0.0,
-            0.0,
-            0.0,  # 0 hp
-            1.0,
-            0.0,
-            0.0,
-            0.0,
-            0.0,  # 0 resources
-            1.0,
-            0.0,
-            0.0,  # no owner
-            1.0,
-            0.0,
-            0.0,
-            0.0,
-            0.0,
-            0.0,
-            0.0,
-            0.0,  # unit type empty cell
-            1.0,
-            0.0,
-            0.0,
-            0.0,
-            0.0,
-            0.0,  # currently not executing actions
-        ]
-    ).astype(np.int32)
+    p2_base[10:13] = np.array([0., 0., 1.,]) # player 2 owns it
+    empty_cell = np.array([
+        1., 0., 0., 0., 0.,  # 0 hp
+        1., 0., 0., 0., 0.,  # 0 resources
+        1., 0., 0.,          # no owner
+        1., 0., 0., 0., 0., 0., 0., 0., # unit type empty cell
+        1., 0., 0., 0., 0., 0. # currently not executing actions
+    ]).astype(np.int32)
+    # fmt: on
 
     # player 1's perspective
     np.testing.assert_array_equal(next_obs[0][0][0], resource)

From bdc2e4af8f0bae7073e6ddd12c4ba4c00a797dfd Mon Sep 17 00:00:00 2001
From: Costa Huang <costa.huang@outlook.com>
Date: Thu, 27 Jan 2022 11:36:17 -0500
Subject: [PATCH 5/6] Add precommit CI

---
 .github/workflows/pre-commit.yml | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 .github/workflows/pre-commit.yml

diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
new file mode 100644
index 00000000..0e138fde
--- /dev/null
+++ b/.github/workflows/pre-commit.yml
@@ -0,0 +1,24 @@
+name: Pre-commit
+
+on:
+  push:
+    branches: [ '*' ]
+  pull_request:
+    branches: [ main ]
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [3.9]
+
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+          submodules: recursive
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+      - uses: pre-commit/action@v2.0.3

From 2e2786d9f5ce6e4610099bb922c2171500113108 Mon Sep 17 00:00:00 2001
From: Costa Huang <costa.huang@outlook.com>
Date: Thu, 27 Jan 2022 11:39:00 -0500
Subject: [PATCH 6/6] change names

---
 .github/workflows/pre-commit.yml | 2 +-
 .github/workflows/pypi.yml       | 2 +-
 .github/workflows/tests.yml      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index 0e138fde..9176620a 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -1,4 +1,4 @@
-name: Pre-commit
+name: pre-commit
 
 on:
   push:
diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml
index a1b6cb16..4625d8e3 100644
--- a/.github/workflows/pypi.yml
+++ b/.github/workflows/pypi.yml
@@ -1,4 +1,4 @@
-name: build
+name: build and distribute
 
 on:
   push:
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 0c923067..ce900919 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -1,4 +1,4 @@
-name: Tests
+name: tests
 
 on:
   push: