From 0e37a511db20dbb05095d92592acabd5a50806dc Mon Sep 17 00:00:00 2001 From: Costa Huang Date: Wed, 26 Jan 2022 13:41:56 -0500 Subject: [PATCH 1/6] Add pre-commit utilities --- .pre-commit-config.yaml | 30 +++++++++++ poetry.lock | 115 +++++++++++++++++++++++++++++++++++++++- pyproject.toml | 1 + 3 files changed, 144 insertions(+), 2 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..dcc8ad9e --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,30 @@ +repos: + - repo: /~https://github.com/PyCQA/isort + rev: 5.10.1 + hooks: + - id: isort + args: + - --skip wandb + - repo: /~https://github.com/myint/autoflake + rev: v1.4 + hooks: + - id: autoflake + args: + - -r + - --exclude=wandb + - --in-place + - --remove-unused-variables + - --remove-all-unused-imports + - repo: /~https://github.com/python/black + rev: 21.7b0 + hooks: + - id: black + args: + - --line-length=127 + - --exclude=wandb + - repo: /~https://github.com/codespell-project/codespell + rev: v2.1.0 + hooks: + - id: codespell + args: + - --ignore-words-list=nd,reacher,thist,ths diff --git a/poetry.lock b/poetry.lock index 1fa60d98..f763b7bb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -269,6 +269,14 @@ python-versions = "*" [package.dependencies] pycparser = "*" +[[package]] +name = "cfgv" +version = "3.3.1" +description = "Validate configuration and produce human readable error messages." +category = "dev" +optional = false +python-versions = ">=3.6.1" + [[package]] name = "chardet" version = "4.0.0" @@ -435,6 +443,14 @@ category = "main" optional = true python-versions = ">=2.7" +[[package]] +name = "distlib" +version = "0.3.4" +description = "Distribution utilities" +category = "dev" +optional = false +python-versions = "*" + [[package]] name = "docker-pycreds" version = "0.4.0" @@ -474,6 +490,18 @@ category = "main" optional = true python-versions = ">=2.7" +[[package]] +name = "filelock" +version = "3.4.2" +description = "A platform independent file lock." +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.extras] +docs = ["furo (>=2021.8.17b43)", "sphinx (>=4.1)", "sphinx-autodoc-typehints (>=1.12)"] +testing = ["covdefaults (>=1.2.0)", "coverage (>=4)", "pytest (>=4)", "pytest-cov", "pytest-timeout (>=1.4.2)"] + [[package]] name = "flake8" version = "3.9.2" @@ -584,6 +612,17 @@ other = ["lz4 (>=3.1.0)", "opencv-python (>=3)"] robotics = ["mujoco_py (>=1.50,<2.0)"] toy_text = ["scipy (>=1.4.1)"] +[[package]] +name = "identify" +version = "2.4.5" +description = "File identification library for Python" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.extras] +license = ["ukkonen"] + [[package]] name = "idna" version = "3.3" @@ -1054,6 +1093,14 @@ category = "main" optional = true python-versions = ">=3.5" +[[package]] +name = "nodeenv" +version = "1.6.0" +description = "Node.js virtual environment builder" +category = "dev" +optional = false +python-versions = "*" + [[package]] name = "numpy" version = "1.21.4" @@ -1227,7 +1274,7 @@ name = "platformdirs" version = "2.4.0" description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." category = "main" -optional = true +optional = false python-versions = ">=3.6" [package.extras] @@ -1270,6 +1317,23 @@ category = "main" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +[[package]] +name = "pre-commit" +version = "2.17.0" +description = "A framework for managing and maintaining multi-language pre-commit hooks." +category = "dev" +optional = false +python-versions = ">=3.6.1" + +[package.dependencies] +cfgv = ">=2.0.0" +identify = ">=1.0.0" +importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} +nodeenv = ">=0.11.1" +pyyaml = ">=5.1" +toml = "*" +virtualenv = ">=20.0.8" + [[package]] name = "promise" version = "2.3" @@ -4082,6 +4146,25 @@ brotli = ["brotlipy (>=0.6.0)"] secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] +[[package]] +name = "virtualenv" +version = "20.13.0" +description = "Virtual Python Environment builder" +category = "dev" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" + +[package.dependencies] +distlib = ">=0.3.1,<1" +filelock = ">=3.2,<4" +importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} +platformdirs = ">=2,<3" +six = ">=1.9.0,<2" + +[package.extras] +docs = ["proselint (>=0.10.2)", "sphinx (>=3)", "sphinx-argparse (>=0.2.5)", "sphinx-rtd-theme (>=0.4.3)", "towncrier (>=21.3)"] +testing = ["coverage (>=4)", "coverage-enable-subprocess (>=1)", "flaky (>=3)", "pytest (>=4)", "pytest-env (>=0.6.2)", "pytest-freezegun (>=0.4.1)", "pytest-mock (>=2)", "pytest-randomly (>=1)", "pytest-timeout (>=1)", "packaging (>=20.0)"] + [[package]] name = "wandb" version = "0.12.6" @@ -4209,7 +4292,7 @@ spyder = ["spyder"] [metadata] lock-version = "1.1" python-versions = ">=3.7.1,<3.10" -content-hash = "39a9e11b61b9edd0e83ecaf014cb7e773a6970bb9cefe387862ef1fb1e087313" +content-hash = "d3e58d69b86b68980d6f1411987e6effbb4b051a5c94839997685794c1135dcb" [metadata.files] absl-py = [ @@ -4353,6 +4436,10 @@ cffi = [ {file = "cffi-1.15.0-cp39-cp39-win_amd64.whl", hash = "sha256:3773c4d81e6e818df2efbc7dd77325ca0dcb688116050fb2b3011218eda36139"}, {file = "cffi-1.15.0.tar.gz", hash = "sha256:920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954"}, ] +cfgv = [ + {file = "cfgv-3.3.1-py2.py3-none-any.whl", hash = "sha256:c6a0883f3917a037485059700b9e75da2464e6c27051014ad85ba6aaa5884426"}, + {file = "cfgv-3.3.1.tar.gz", hash = "sha256:f5a830efb9ce7a445376bb66ec94c638a9787422f96264c98edc6bdeed8ab736"}, +] chardet = [ {file = "chardet-4.0.0-py2.py3-none-any.whl", hash = "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"}, {file = "chardet-4.0.0.tar.gz", hash = "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa"}, @@ -4446,6 +4533,10 @@ diff-match-patch = [ {file = "diff-match-patch-20200713.tar.gz", hash = "sha256:da6f5a01aa586df23dfc89f3827e1cafbb5420be9d87769eeb079ddfd9477a18"}, {file = "diff_match_patch-20200713-py3-none-any.whl", hash = "sha256:8bf9d9c4e059d917b5c6312bac0c137971a32815ddbda9c682b949f2986b4d34"}, ] +distlib = [ + {file = "distlib-0.3.4-py2.py3-none-any.whl", hash = "sha256:6564fe0a8f51e734df6333d08b8b94d4ea8ee6b99b5ed50613f731fd4089f34b"}, + {file = "distlib-0.3.4.zip", hash = "sha256:e4b58818180336dc9c529bfb9a0b58728ffc09ad92027a3f30b7cd91e3458579"}, +] docker-pycreds = [ {file = "docker-pycreds-0.4.0.tar.gz", hash = "sha256:6ce3270bcaf404cc4c3e27e4b6c70d3521deae82fb508767870fdbf772d584d4"}, {file = "docker_pycreds-0.4.0-py2.py3-none-any.whl", hash = "sha256:7266112468627868005106ec19cd0d722702d2b7d5912a28e19b826c3d37af49"}, @@ -4463,6 +4554,10 @@ entrypoints = [ {file = "entrypoints-0.3-py2.py3-none-any.whl", hash = "sha256:589f874b313739ad35be6e0cd7efde2a4e9b6fea91edcc34e58ecbb8dbe56d19"}, {file = "entrypoints-0.3.tar.gz", hash = "sha256:c70dd71abe5a8c85e55e12c19bd91ccfeec11a6e99044204511f9ed547d48451"}, ] +filelock = [ + {file = "filelock-3.4.2-py3-none-any.whl", hash = "sha256:cf0fc6a2f8d26bd900f19bf33915ca70ba4dd8c56903eeb14e1e7a2fd7590146"}, + {file = "filelock-3.4.2.tar.gz", hash = "sha256:38b4f4c989f9d06d44524df1b24bd19e167d851f19b50bf3e3559952dddc5b80"}, +] flake8 = [ {file = "flake8-3.9.2-py2.py3-none-any.whl", hash = "sha256:bf8fd333346d844f616e8d47905ef3a3384edae6b4e9beb0c5101e25e3110907"}, {file = "flake8-3.9.2.tar.gz", hash = "sha256:07528381786f2a6237b061f6e96610a4167b226cb926e2aa2b6b1d78057c576b"}, @@ -4532,6 +4627,10 @@ grpcio = [ gym = [ {file = "gym-0.21.0.tar.gz", hash = "sha256:0fd1ce165c754b4017e37a617b097c032b8c3feb8a0394ccc8777c7c50dddff3"}, ] +identify = [ + {file = "identify-2.4.5-py2.py3-none-any.whl", hash = "sha256:d27d10099844741c277b45d809bd452db0d70a9b41ea3cd93799ebbbcc6dcb29"}, + {file = "identify-2.4.5.tar.gz", hash = "sha256:d11469ff952a4d7fd7f9be520d335dc450f585d474b39b5dfb86a500831ab6c7"}, +] idna = [ {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"}, {file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"}, @@ -4801,6 +4900,10 @@ nest-asyncio = [ {file = "nest_asyncio-1.5.1-py3-none-any.whl", hash = "sha256:76d6e972265063fe92a90b9cc4fb82616e07d586b346ed9d2c89a4187acea39c"}, {file = "nest_asyncio-1.5.1.tar.gz", hash = "sha256:afc5a1c515210a23c461932765691ad39e8eba6551c055ac8d5546e69250d0aa"}, ] +nodeenv = [ + {file = "nodeenv-1.6.0-py2.py3-none-any.whl", hash = "sha256:621e6b7076565ddcacd2db0294c0381e01fd28945ab36bcf00f41c5daf63bef7"}, + {file = "nodeenv-1.6.0.tar.gz", hash = "sha256:3ef13ff90291ba2a4a7a4ff9a979b63ffdd00a464dbe04acf0ea6471517a4c2b"}, +] numpy = [ {file = "numpy-1.21.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8890b3360f345e8360133bc078d2dacc2843b6ee6059b568781b15b97acbe39f"}, {file = "numpy-1.21.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:69077388c5a4b997442b843dbdc3a85b420fb693ec8e33020bb24d647c164fa5"}, @@ -4989,6 +5092,10 @@ poyo = [ {file = "poyo-0.5.0-py2.py3-none-any.whl", hash = "sha256:3e2ca8e33fdc3c411cd101ca395668395dd5dc7ac775b8e809e3def9f9fe041a"}, {file = "poyo-0.5.0.tar.gz", hash = "sha256:e26956aa780c45f011ca9886f044590e2d8fd8b61db7b1c1cf4e0869f48ed4dd"}, ] +pre-commit = [ + {file = "pre_commit-2.17.0-py2.py3-none-any.whl", hash = "sha256:725fa7459782d7bec5ead072810e47351de01709be838c2ce1726b9591dad616"}, + {file = "pre_commit-2.17.0.tar.gz", hash = "sha256:c1a8040ff15ad3d648c70cc3e55b93e4d2d5b687320955505587fd79bbaed06a"}, +] promise = [ {file = "promise-2.3.tar.gz", hash = "sha256:dfd18337c523ba4b6a58801c164c1904a9d4d1b1747c7d5dbf45b693a49d93d0"}, ] @@ -6347,6 +6454,10 @@ urllib3 = [ {file = "urllib3-1.26.7-py2.py3-none-any.whl", hash = "sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844"}, {file = "urllib3-1.26.7.tar.gz", hash = "sha256:4987c65554f7a2dbf30c18fd48778ef124af6fab771a377103da0585e2336ece"}, ] +virtualenv = [ + {file = "virtualenv-20.13.0-py2.py3-none-any.whl", hash = "sha256:339f16c4a86b44240ba7223d0f93a7887c3ca04b5f9c8129da7958447d079b09"}, + {file = "virtualenv-20.13.0.tar.gz", hash = "sha256:d8458cf8d59d0ea495ad9b34c2599487f8a7772d796f9910858376d1600dd2dd"}, +] wandb = [ {file = "wandb-0.12.6-py2.py3-none-any.whl", hash = "sha256:a486a697d18ca82e1cde64aa60997a9a37c71af3c6946240bda81a4d61f2bcf4"}, {file = "wandb-0.12.6.tar.gz", hash = "sha256:ad946efc269b25a36b500a831b6bf9ae26b4a695add55e4a53f5b7220e03b177"}, diff --git a/pyproject.toml b/pyproject.toml index c5fd79fe..c31cf1f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ trueskill = "^0.4.5" stable-baselines3 = "^1.1.0" torch = "1.7.1" pytest = "^6.2.5" +pre-commit = "^2.17.0" [tool.poetry-dynamic-versioning] enable = true From e794ac91c785cbe1828ff82b8b3c7d6bb0a30545 Mon Sep 17 00:00:00 2001 From: Costa Huang Date: Wed, 26 Jan 2022 13:48:34 -0500 Subject: [PATCH 2/6] isort autoflake --- experiments/league.py | 181 +++++++++---------- experiments/ppo_gridnet.py | 40 ++-- experiments/ppo_gridnet_eval.py | 16 +- gym_microrts/envs/vec_env.py | 226 +++++++++++++---------- gym_microrts/microrts_ai.py | 40 +++- gym_microrts/microrts_maps.py | 2 +- hello_world.py | 45 +++-- tests/test_e2e.py | 5 +- tests/test_mask.py | 311 ++++++++++++++++++++++++++------ tests/test_observation.py | 173 ++++++++++++++---- tests/test_reward.py | 50 ++--- 11 files changed, 745 insertions(+), 344 deletions(-) diff --git a/experiments/league.py b/experiments/league.py index 439a4cf5..bb708928 100644 --- a/experiments/league.py +++ b/experiments/league.py @@ -1,38 +1,27 @@ # http://proceedings.mlr.press/v97/han19a/han19a.pdf import argparse +import datetime +import itertools import os import random -import time +import shutil from distutils.util import strtobool +from enum import Enum import numpy as np -import pickle import pandas as pd import torch -from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv, MicroRTSBotVecEnv -from gym_microrts import microrts_ai # fmt: off -from stable_baselines3.common.vec_env import VecMonitor, VecVideoRecorder -from torch.utils.tensorboard import SummaryWriter -from trueskill import TrueSkill, Rating, rate_1vs1, quality_1vs1 +from peewee import (JOIN, CharField, DateTimeField, FloatField, + ForeignKeyField, Model, SmallIntegerField, SqliteDatabase, + fn) from ppo_gridnet import Agent, MicroRTSStatsRecorder -import itertools -from peewee import ( - Model, - SqliteDatabase, - CharField, - ForeignKeyField, - TextField, - DateTimeField, - BooleanField, - FloatField, - SmallIntegerField, - JOIN, - fn, -) -import datetime -from enum import Enum -import shutil +from stable_baselines3.common.vec_env import VecMonitor +from trueskill import Rating, quality_1vs1, rate_1vs1 + +from gym_microrts import microrts_ai # fmt: off +from gym_microrts.envs.vec_env import MicroRTSBotVecEnv, MicroRTSGridModeVecEnv + def parse_args(): # fmt: off @@ -64,33 +53,40 @@ def parse_args(): # fmt: on return args + args = parse_args() dbname = "league" -if(args.partial_obs): - dbname = 'po_league' +if args.partial_obs: + dbname = "po_league" dbpath = f"gym-microrts-static-files/{dbname}.db" csvpath = f"gym-microrts-static-files/{dbname}.csv" db = SqliteDatabase(dbpath) + + class BaseModel(Model): class Meta: database = db + class AI(BaseModel): name = CharField(unique=True) mu = FloatField() sigma = FloatField() ai_type = CharField() + def __str__(self): return f"🤖 {self.name} with N({round(self.mu, 3)}, {round(self.sigma, 3)})" + class MatchHistory(BaseModel): - challenger = ForeignKeyField(AI, backref='challenger_match_histories') - defender = ForeignKeyField(AI, backref='defender_match_histories') + challenger = ForeignKeyField(AI, backref="challenger_match_histories") + defender = ForeignKeyField(AI, backref="defender_match_histories") win = SmallIntegerField() draw = SmallIntegerField() loss = SmallIntegerField() created_date = DateTimeField(default=datetime.datetime.now) + db.connect() db.create_tables([AI, MatchHistory]) @@ -100,17 +96,18 @@ class Outcome(Enum): DRAW = 0 LOSS = -1 + class Match: def __init__(self, partial_obs: bool, match_up=None): # mode 0: rl-ai vs built-in-ai # mode 1: rl-ai vs rl-ai # mode 2: built-in-ai vs built-in-ai - built_in_ais=None - built_in_ais2=None - rl_ai=None - rl_ai2=None - + built_in_ais = None + built_in_ais2 = None + rl_ai = None + rl_ai2 = None + # determine mode rl_ais = [] built_in_ais = [] @@ -123,23 +120,23 @@ def __init__(self, partial_obs: bool, match_up=None): mode = 0 p0 = rl_ais[0] p1 = built_in_ais[0] - rl_ai=p0 - built_in_ais=[eval(f"microrts_ai.{p1}")] + rl_ai = p0 + built_in_ais = [eval(f"microrts_ai.{p1}")] elif len(rl_ais) == 2: mode = 1 p0 = rl_ais[0] p1 = rl_ais[1] - rl_ai=p0 - rl_ai2=p1 + rl_ai = p0 + rl_ai2 = p1 else: mode = 2 p0 = built_in_ais[0] p1 = built_in_ais[1] - built_in_ais=[eval(f"microrts_ai.{p0}")] - built_in_ais2=[eval(f"microrts_ai.{p1}")] - + built_in_ais = [eval(f"microrts_ai.{p0}")] + built_in_ais2 = [eval(f"microrts_ai.{p1}")] + self.p0, self.p1 = p0, p1 - + self.mode = mode self.partial_obs = partial_obs self.built_in_ais = built_in_ais @@ -185,7 +182,7 @@ def __init__(self, partial_obs: bool, match_up=None): max_steps=max_steps, render_theme=2, map_paths=["maps/16x16/basesWorkers16x16.xml"], - reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]) + reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]), ) self.envs = MicroRTSStatsRecorder(self.envs) self.envs = VecMonitor(self.envs) @@ -197,10 +194,10 @@ def run(self, num_matches=7): return self.run_m1(num_matches) else: return self.run_m2(num_matches) - + def run_m0(self, num_matches): results = [] - mapsize = 16 * 16 + 16 * 16 next_obs = torch.Tensor(self.envs.reset()).to(self.device) while True: # self.envs.render() @@ -216,7 +213,7 @@ def run_m0(self, num_matches): except Exception as e: e.printStackTrace() raise - + for idx, info in enumerate(infos): if "episode" in info.keys(): results += [info["microrts_stats"]["WinLossRewardFunction"]] @@ -225,19 +222,19 @@ def run_m0(self, num_matches): def run_m1(self, num_matches): results = [] - mapsize = 16 * 16 + 16 * 16 next_obs = torch.Tensor(self.envs.reset()).to(self.device) while True: # self.envs.render() # ALGO LOGIC: put action logic here with torch.no_grad(): mask = torch.tensor(np.array(self.envs.get_action_mask())).to(self.device) - + p1_obs = next_obs[::2] p2_obs = next_obs[1::2] p1_mask = mask[::2] p2_mask = mask[1::2] - + p1_action, _, _, _, _ = self.agent.get_action_and_value( p1_obs, envs=self.envs, invalid_action_masks=p1_mask, device=self.device ) @@ -254,7 +251,7 @@ def run_m1(self, num_matches): except Exception as e: e.printStackTrace() raise - + for idx, info in enumerate(infos): if "episode" in info.keys(): results += [info["microrts_stats"]["WinLossRewardFunction"]] @@ -268,28 +265,34 @@ def run_m2(self, num_matches): # self.envs.render() # dummy actions next_obs, reward, done, infos = self.envs.step( - [[[0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0],]]) + [ + [ + [0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0], + ] + ] + ) for idx, info in enumerate(infos): if "episode" in info.keys(): results += [info["microrts_stats"]["WinLossRewardFunction"]] if len(results) >= num_matches: return results + def get_ai_type(ai_name): if ai_name[-3:] == ".pt": - return 'rl_ai' + return "rl_ai" else: - return 'built_in_ai' + return "built_in_ai" def get_match_history(ai_name): - query = (MatchHistory - .select( + query = ( + MatchHistory.select( AI.name, - fn.SUM(MatchHistory.win).alias('wins'), - fn.SUM(MatchHistory.draw).alias('draws'), - fn.SUM(MatchHistory.loss).alias('losss'), + fn.SUM(MatchHistory.win).alias("wins"), + fn.SUM(MatchHistory.draw).alias("draws"), + fn.SUM(MatchHistory.loss).alias("losss"), ) .join(AI, JOIN.LEFT_OUTER, on=MatchHistory.defender) .group_by(MatchHistory.defender) @@ -297,50 +300,48 @@ def get_match_history(ai_name): ) return pd.DataFrame(list(query.dicts())) + def get_leaderboard(): - query = (AI.select( - AI.name, - AI.mu, - AI.sigma, - (AI.mu - 3 * AI.sigma).alias('trueskill'), - ) - .order_by((AI.mu - 3 * AI.sigma).desc()) - ) + query = AI.select( + AI.name, + AI.mu, + AI.sigma, + (AI.mu - 3 * AI.sigma).alias("trueskill"), + ).order_by((AI.mu - 3 * AI.sigma).desc()) return pd.DataFrame(list(query.dicts())) + def get_leaderboard_existing_ais(existing_ai_names): - query = (AI.select( + query = ( + AI.select( AI.name, AI.mu, AI.sigma, - (AI.mu - 3 * AI.sigma).alias('trueskill'), + (AI.mu - 3 * AI.sigma).alias("trueskill"), ) .where((AI.name.in_(existing_ai_names))) .order_by((AI.mu - 3 * AI.sigma).desc()) ) return pd.DataFrame(list(query.dicts())) + if __name__ == "__main__": existing_ai_names = [item.name for item in AI.select()] all_ai_names = set(existing_ai_names + args.evals) if not args.update_db: shutil.copyfile(dbpath, f"{dbpath}.backup") - for ai_name in all_ai_names: + for ai_name in all_ai_names: ai = AI.get_or_none(name=ai_name) if ai is None: - ai = AI( - name=ai_name, - mu=25.0, - sigma=8.333333333333334, - ai_type=get_ai_type(ai_name)) + ai = AI(name=ai_name, mu=25.0, sigma=8.333333333333334, ai_type=get_ai_type(ai_name)) ai.save() # case 1: initialize the league with round robin if len(existing_ai_names) == 0: match_ups = list(itertools.combinations(all_ai_names, 2)) np.random.shuffle(match_ups) - for idx in range(2): # switch player 1 and 2's starting locations + for idx in range(2): # switch player 1 and 2's starting locations for match_up in match_ups: if idx == 0: match_up = list(reversed(match_up)) @@ -348,7 +349,7 @@ def get_leaderboard_existing_ais(existing_ai_names): m = Match(args.partial_obs, match_up) challenger = AI.get_or_none(name=m.p0) defender = AI.get_or_none(name=m.p1) - + r = m.run(args.num_matches // 2) for item in r: drawn = False @@ -360,19 +361,18 @@ def get_leaderboard_existing_ais(existing_ai_names): else: winner = defender loser = challenger - + print(f"{winner.name} {'draws' if drawn else 'wins'} {loser.name}") - + winner_rating, loser_rating = rate_1vs1( - Rating(winner.mu, winner.sigma), - Rating(loser.mu, loser.sigma), - drawn=drawn) + Rating(winner.mu, winner.sigma), Rating(loser.mu, loser.sigma), drawn=drawn + ) winner.mu, winner.sigma = winner_rating.mu, winner_rating.sigma loser.mu, loser.sigma = loser_rating.mu, loser_rating.sigma winner.save() loser.save() - + MatchHistory( challenger=challenger, defender=defender, @@ -397,7 +397,7 @@ def get_leaderboard_existing_ais(existing_ai_names): if ai.name == opponent_ai.name: continue match_qualities += [[opponent_ai, quality_1vs1(ai, opponent_ai)]] - + # sort by quality match_qualities = sorted(match_qualities, key=lambda x: x[1], reverse=True) print("match_qualities[:3]", match_qualities[:3]) @@ -408,8 +408,8 @@ def get_leaderboard_existing_ais(existing_ai_names): match_up = (ai.name, opponent_ai.name) match_quality = quality_1vs1(ai, opponent_ai) print(f"the match up is ({ai}, {opponent_ai}), quality is {round(match_quality, 4)}") - winner = ai # dummy setting - for idx in range(2): # switch player 1 and 2's starting locations + winner = ai # dummy setting + for idx in range(2): # switch player 1 and 2's starting locations if idx == 0: match_up = list(reversed(match_up)) m = Match(args.partial_obs, match_up) @@ -430,10 +430,9 @@ def get_leaderboard_existing_ais(existing_ai_names): loser = challenger print(f"{winner.name} {'draws' if drawn else 'wins'} {loser.name}") winner_rating, loser_rating = rate_1vs1( - Rating(winner.mu, winner.sigma), - Rating(loser.mu, loser.sigma), - drawn=drawn) - + Rating(winner.mu, winner.sigma), Rating(loser.mu, loser.sigma), drawn=drawn + ) + # freeze existing AIs ratings if winner.name == ai.name: ai.mu, ai.sigma = winner_rating.mu, winner_rating.sigma @@ -448,9 +447,9 @@ def get_leaderboard_existing_ais(existing_ai_names): draw=int(item == 0), loss=int(item == -1), ).save() - + get_leaderboard().to_csv(f"{dbname}.temp.csv", index=False) - + print("=======================") print(get_leaderboard()) if not args.update_db: diff --git a/experiments/ppo_gridnet.py b/experiments/ppo_gridnet.py index 95a22c3d..4754c4b3 100644 --- a/experiments/ppo_gridnet.py +++ b/experiments/ppo_gridnet.py @@ -3,8 +3,8 @@ import argparse import os import random -import time import subprocess +import time from distutils.util import strtobool import numpy as np @@ -13,12 +13,14 @@ import torch.nn as nn import torch.optim as optim from gym.spaces import MultiDiscrete -from gym_microrts import microrts_ai -from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv -from stable_baselines3.common.vec_env import VecEnvWrapper, VecMonitor, VecVideoRecorder +from stable_baselines3.common.vec_env import (VecEnvWrapper, VecMonitor, + VecVideoRecorder) from torch.distributions.categorical import Categorical from torch.utils.tensorboard import SummaryWriter +from gym_microrts import microrts_ai +from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv + def parse_args(): # fmt: off @@ -84,7 +86,7 @@ def parse_args(): parser.add_argument('--anneal-lr', type=lambda x: bool(strtobool(x)), default=True, nargs='?', const=True, help="Toggle learning rate annealing for policy and value networks") parser.add_argument('--clip-vloss', type=lambda x: bool(strtobool(x)), default=True, nargs='?', const=True, - help='Toggles wheter or not to use a clipped loss for the value function, as per the paper.') + help='Toggles whether or not to use a clipped loss for the value function, as per the paper.') parser.add_argument('--num-models', type=int, default=200, help='the number of models saved') @@ -153,7 +155,6 @@ def layer_init(layer, std=np.sqrt(2), bias_const=0.0): return layer - class Agent(nn.Module): def __init__(self, envs, mapsize=16 * 16): super(Agent, self).__init__() @@ -190,7 +191,7 @@ def __init__(self, envs, mapsize=16 * 16): nn.ReLU(), layer_init(nn.Linear(128, 1), std=1), ) - self.register_buffer('mask_value', torch.tensor(-1e8)) + self.register_buffer("mask_value", torch.tensor(-1e8)) def get_action_and_value(self, x, action=None, invalid_action_masks=None, envs=None, device=None): hidden = self.encoder(x) @@ -227,7 +228,6 @@ def get_value(self, x): return self.critic(self.encoder(x)) - if __name__ == "__main__": args = parse_args() @@ -307,7 +307,6 @@ def get_value(self, x): ## CRASH AND RESUME LOGIC: starting_update = 1 - from jpype.types import JArray, JInt if args.prod_mode and wandb.run.resumed: starting_update = run.summary.get("charts/update") + 1 @@ -410,7 +409,7 @@ def get_value(self, x): b_values = values.reshape(-1) b_invalid_action_masks = invalid_action_masks.reshape((-1,) + invalid_action_shape) - # Optimizaing the policy and value network + # Optimizing the policy and value network inds = np.arange( args.batch_size, ) @@ -458,13 +457,24 @@ def get_value(self, x): ## CRASH AND RESUME LOGIC: if args.prod_mode: - if (update-1) % args.save_frequency == 0: + if (update - 1) % args.save_frequency == 0: if not os.path.exists(f"models/{experiment_name}"): os.makedirs(f"models/{experiment_name}") torch.save(agent.state_dict(), f"models/{experiment_name}/agent.pt") torch.save(agent.state_dict(), f"models/{experiment_name}/{global_step}.pt") wandb.save(f"models/{experiment_name}/agent.pt", base_path=f"models/{experiment_name}", policy="now") - subprocess.Popen(["python", "league.py", "--evals", f"models/{experiment_name}/{global_step}.pt", "--update-db", "false", "--cuda", "false"]) + subprocess.Popen( + [ + "python", + "league.py", + "--evals", + f"models/{experiment_name}/{global_step}.pt", + "--update-db", + "false", + "--cuda", + "false", + ] + ) eval_queue += [f"models/{experiment_name}/{global_step}.pt"] print(f"Evaluating models/{experiment_name}/{global_step}.pt") @@ -484,8 +494,8 @@ def get_value(self, x): trueskill_data = { "name": league.loc[model_path].name, "mu": league.loc[model_path]["mu"], - "sigma":league.loc[model_path]["sigma"], - "trueskill": league.loc[model_path]["trueskill"] + "sigma": league.loc[model_path]["sigma"], + "trueskill": league.loc[model_path]["trueskill"], } trueskill_df = trueskill_df.append(trueskill_data, ignore_index=True) wandb.log({"trueskill": wandb.Table(dataframe=trueskill_df)}) @@ -494,7 +504,7 @@ def get_value(self, x): trueskill_step_df = trueskill_step_df.append(trueskill_data, ignore_index=True) preset_trueskill_step_df_clone = preset_trueskill_step_df.copy() preset_trueskill_step_df_clone["step"] = model_global_step - trueskill_step_df = trueskill_step_df.append(preset_trueskill_step_df_clone, ignore_index=True) + trueskill_step_df = trueskill_step_df.append(preset_trueskill_step_df_clone, ignore_index=True) wandb.log({"trueskill_step": wandb.Table(dataframe=trueskill_step_df)}) # TRY NOT TO MODIFY: record rewards for plotting purposes diff --git a/experiments/ppo_gridnet_eval.py b/experiments/ppo_gridnet_eval.py index 8ef98265..392e2890 100644 --- a/experiments/ppo_gridnet_eval.py +++ b/experiments/ppo_gridnet_eval.py @@ -8,15 +8,14 @@ import numpy as np import torch -import torch.nn as nn import torch.optim as optim from gym.spaces import MultiDiscrete -from gym_microrts import microrts_ai -from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv -from stable_baselines3.common.vec_env import VecEnvWrapper, VecMonitor, VecVideoRecorder -from torch.distributions.categorical import Categorical -from torch.utils.tensorboard import SummaryWriter from ppo_gridnet import Agent, MicroRTSStatsRecorder +from stable_baselines3.common.vec_env import VecMonitor, VecVideoRecorder +from torch.utils.tensorboard import SummaryWriter + +from gym_microrts import microrts_ai # noqa +from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv def parse_args(): @@ -101,7 +100,6 @@ def parse_args(): torch.manual_seed(args.seed) torch.backends.cudnn.deterministic = args.torch_deterministic - ais = [] if args.ai: ais = [eval(f"microrts_ai.{args.ai}")] @@ -163,7 +161,7 @@ def parse_args(): # ALGO LOGIC: put action logic here with torch.no_grad(): invalid_action_masks[step] = torch.tensor(np.array(envs.get_action_mask())).to(device) - + if args.ai: action, logproba, _, _, vs = agent.get_action_and_value( next_obs, envs=envs, invalid_action_masks=invalid_action_masks[step], device=device @@ -173,7 +171,7 @@ def parse_args(): p2_obs = next_obs[1::2] p1_mask = invalid_action_masks[step][::2] p2_mask = invalid_action_masks[step][1::2] - + p1_action, _, _, _, _ = agent.get_action_and_value( p1_obs, envs=envs, invalid_action_masks=p1_mask, device=device ) diff --git a/gym_microrts/envs/vec_env.py b/gym_microrts/envs/vec_env.py index 2b183fe7..2a509ae2 100644 --- a/gym_microrts/envs/vec_env.py +++ b/gym_microrts/envs/vec_env.py @@ -1,23 +1,20 @@ - -import os import json +import os import xml.etree.ElementTree as ET -import numpy as np -from PIL import Image import gym -import gym_microrts - import jpype -from jpype.imports import registerDomain import jpype.imports +import numpy as np +from jpype.imports import registerDomain from jpype.types import JArray, JInt +from PIL import Image + +import gym_microrts + class MicroRTSGridModeVecEnv: - metadata = { - 'render.modes': ['human', 'rgb_array'], - 'video.frames_per_second' : 150 - } + metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 150} """ [[0]x_coordinate*y_coordinate(x*y), [1]a_t(6), [2]p_move(4), [3]p_harvest(4), [4]p_return(4), [5]p_produce_direction(4), [6]p_produce_unit_type(z), @@ -26,7 +23,8 @@ class MicroRTSGridModeVecEnv: :param env: gym3 environment to adapt """ - def __init__(self, + def __init__( + self, num_selfplay_envs, num_bot_envs, partial_obs=False, @@ -35,7 +33,8 @@ def __init__(self, frame_skip=0, ai2s=[], map_paths=["maps/10x10/basesTwoWorkers10x10.xml"], - reward_weight=np.array([0.0, 1.0, 0.0, 0.0, 0.0, 5.0])): + reward_weight=np.array([0.0, 1.0, 0.0, 0.0, 0.0, 5.0]), + ): self.num_selfplay_envs = num_selfplay_envs self.num_bot_envs = num_bot_envs @@ -50,11 +49,13 @@ def __init__(self, if len(map_paths) == 1: self.map_paths = [map_paths[0] for _ in range(self.num_envs)] else: - assert len(map_paths) == self.num_envs, "if multiple maps are provided, they should be provided for each environment" + assert ( + len(map_paths) == self.num_envs + ), "if multiple maps are provided, they should be provided for each environment" self.reward_weight = reward_weight # read map - self.microrts_path = os.path.join(gym_microrts.__path__[0], 'microrts') + self.microrts_path = os.path.join(gym_microrts.__path__[0], "microrts") root = ET.parse(os.path.join(self.microrts_path, self.map_paths[0])).getroot() self.height, self.width = int(root.get("height")), int(root.get("width")) @@ -63,9 +64,15 @@ def __init__(self, registerDomain("ts", alias="tests") registerDomain("ai") jars = [ - "microrts.jar", "lib/bots/Coac.jar", "lib/bots/Droplet.jar", "lib/bots/GRojoA3N.jar", - "lib/bots/Izanagi.jar", "lib/bots/MixedBot.jar", "lib/bots/TiamatBot.jar", "lib/bots/UMSBot.jar", - "lib/bots/mayariBot.jar" # "MindSeal.jar" + "microrts.jar", + "lib/bots/Coac.jar", + "lib/bots/Droplet.jar", + "lib/bots/GRojoA3N.jar", + "lib/bots/Izanagi.jar", + "lib/bots/MixedBot.jar", + "lib/bots/TiamatBot.jar", + "lib/bots/UMSBot.jar", + "lib/bots/mayariBot.jar", # "MindSeal.jar" ] for jar in jars: jpype.addClassPath(os.path.join(self.microrts_path, jar)) @@ -73,47 +80,56 @@ def __init__(self, # start microrts client from rts.units import UnitTypeTable + self.real_utt = UnitTypeTable() - from ai.rewardfunction import RewardFunctionInterface, WinLossRewardFunction, ResourceGatherRewardFunction, AttackRewardFunction, ProduceWorkerRewardFunction, ProduceBuildingRewardFunction, ProduceCombatUnitRewardFunction, CloserToEnemyBaseRewardFunction - self.rfs = JArray(RewardFunctionInterface)([ - WinLossRewardFunction(), - ResourceGatherRewardFunction(), - ProduceWorkerRewardFunction(), - ProduceBuildingRewardFunction(), - AttackRewardFunction(), - ProduceCombatUnitRewardFunction(), - # CloserToEnemyBaseRewardFunction(), - ]) + from ai.rewardfunction import (AttackRewardFunction, + ProduceBuildingRewardFunction, + ProduceCombatUnitRewardFunction, + ProduceWorkerRewardFunction, + ResourceGatherRewardFunction, + RewardFunctionInterface, + WinLossRewardFunction) + + self.rfs = JArray(RewardFunctionInterface)( + [ + WinLossRewardFunction(), + ResourceGatherRewardFunction(), + ProduceWorkerRewardFunction(), + ProduceBuildingRewardFunction(), + AttackRewardFunction(), + ProduceCombatUnitRewardFunction(), + # CloserToEnemyBaseRewardFunction(), + ] + ) self.start_client() # computed properties - # [num_planes_hp(5), num_planes_resources(5), num_planes_player(5), + # [num_planes_hp(5), num_planes_resources(5), num_planes_player(5), # num_planes_unit_type(z), num_planes_unit_action(6)] - self.num_planes = [5, 5, 3, len(self.utt['unitTypes'])+1, 6] + self.num_planes = [5, 5, 3, len(self.utt["unitTypes"]) + 1, 6] if partial_obs: - self.num_planes = [5, 5, 3, len(self.utt['unitTypes'])+1, 6, 2] - self.observation_space = gym.spaces.Box(low=0.0, - high=1.0, - shape=(self.height, self.width, - sum(self.num_planes)), - dtype=np.int32) + self.num_planes = [5, 5, 3, len(self.utt["unitTypes"]) + 1, 6, 2] + self.observation_space = gym.spaces.Box( + low=0.0, high=1.0, shape=(self.height, self.width, sum(self.num_planes)), dtype=np.int32 + ) self.num_planes_len = len(self.num_planes) self.num_planes_prefix_sum = [0] for num_plane in self.num_planes: self.num_planes_prefix_sum.append(self.num_planes_prefix_sum[-1] + num_plane) - self.action_space_dims = [6, 4, 4, 4, 4, len(self.utt['unitTypes']), 7 * 7] + self.action_space_dims = [6, 4, 4, 4, 4, len(self.utt["unitTypes"]), 7 * 7] self.action_space = gym.spaces.MultiDiscrete(np.array([self.action_space_dims] * self.height * self.width).flatten()) self.action_plane_space = gym.spaces.MultiDiscrete(self.action_space_dims) - self.source_unit_idxs = np.tile(np.arange(self.height*self.width), (self.num_envs,1)) + self.source_unit_idxs = np.tile(np.arange(self.height * self.width), (self.num_envs, 1)) self.source_unit_idxs = self.source_unit_idxs.reshape((self.source_unit_idxs.shape + (1,))) - + def start_client(self): - from ts import JNIGridnetVecClient as Client from ai.core import AI + from ts import JNIGridnetVecClient as Client + self.vec_client = Client( self.num_selfplay_envs, self.num_bot_envs, @@ -125,34 +141,36 @@ def start_client(self): self.real_utt, self.partial_obs, ) - self.render_client = self.vec_client.selfPlayClients[0] if len(self.vec_client.selfPlayClients) > 0 else self.vec_client.clients[0] + self.render_client = ( + self.vec_client.selfPlayClients[0] if len(self.vec_client.selfPlayClients) > 0 else self.vec_client.clients[0] + ) # get the unit type table self.utt = json.loads(str(self.render_client.sendUTT())) def reset(self): - responses = self.vec_client.reset([0]*self.num_envs) + responses = self.vec_client.reset([0] * self.num_envs) obs = [self._encode_obs(np.array(ro)) for ro in responses.observation] return np.array(obs) def _encode_obs(self, obs): - obs = obs.reshape(len(obs), -1).clip(0, np.array([self.num_planes]).T-1) + obs = obs.reshape(len(obs), -1).clip(0, np.array([self.num_planes]).T - 1) obs_planes = np.zeros((self.height * self.width, self.num_planes_prefix_sum[-1]), dtype=np.int32) obs_planes_idx = np.arange(len(obs_planes)) - obs_planes[obs_planes_idx,obs[0]] = 1 + obs_planes[obs_planes_idx, obs[0]] = 1 for i in range(1, self.num_planes_len): - obs_planes[obs_planes_idx,obs[i]+self.num_planes_prefix_sum[i]] = 1 + obs_planes[obs_planes_idx, obs[i] + self.num_planes_prefix_sum[i]] = 1 return obs_planes.reshape(self.height, self.width, -1) def step_async(self, actions): - actions = actions.reshape((self.num_envs, self.width*self.height, -1)) - actions = np.concatenate((self.source_unit_idxs, actions), 2) # specify source unit - actions = actions[np.where(self.source_unit_mask==1)] # valid actions + actions = actions.reshape((self.num_envs, self.width * self.height, -1)) + actions = np.concatenate((self.source_unit_idxs, actions), 2) # specify source unit + actions = actions[np.where(self.source_unit_mask == 1)] # valid actions action_counts_per_env = self.source_unit_mask.sum(1) - java_actions = [None]*len(action_counts_per_env) + java_actions = [None] * len(action_counts_per_env) action_idx = 0 for outer_idx, action_count in enumerate(action_counts_per_env): - java_valid_action = [None]*action_count + java_valid_action = [None] * action_count for idx in range(action_count): java_valid_action[idx] = JArray(JInt)(actions[action_idx]) action_idx += 1 @@ -160,11 +178,11 @@ def step_async(self, actions): self.actions = JArray(JArray(JArray(JInt)))(java_actions) def step_wait(self): - responses = self.vec_client.gameStep(self.actions, [0]*self.num_envs) + responses = self.vec_client.gameStep(self.actions, [0] * self.num_envs) reward, done = np.array(responses.reward), np.array(responses.done) obs = [self._encode_obs(np.array(ro)) for ro in responses.observation] infos = [{"raw_rewards": item} for item in reward] - return np.array(obs), reward @ self.reward_weight, done[:,0], infos + return np.array(obs), reward @ self.reward_weight, done[:, 0], infos def step(self, ac): self.step_async(ac) @@ -184,10 +202,10 @@ def getattr_depth_check(self, name, already_found): def render(self, mode="human"): if mode == "human": self.render_client.render(False) - elif mode == 'rgb_array': + elif mode == "rgb_array": bytes_array = np.array(self.render_client.render(True)) image = Image.frombytes("RGB", (640, 640), bytes_array) - return np.array(image)[:,:,::-1] + return np.array(image)[:, :, ::-1] def close(self): if jpype._jpype.isStarted(): @@ -196,25 +214,24 @@ def close(self): def get_action_mask(self): action_mask = np.array(self.vec_client.getMasks(0)) - self.source_unit_mask = action_mask[:,:,:,0].reshape(self.num_envs, -1) - action_type_and_parameter_mask = action_mask[:,:,:,1:].reshape(self.num_envs, self.height*self.width, -1) + self.source_unit_mask = action_mask[:, :, :, 0].reshape(self.num_envs, -1) + action_type_and_parameter_mask = action_mask[:, :, :, 1:].reshape(self.num_envs, self.height * self.width, -1) return action_type_and_parameter_mask class MicroRTSBotVecEnv(MicroRTSGridModeVecEnv): - metadata = { - 'render.modes': ['human', 'rgb_array'], - 'video.frames_per_second' : 150 - } + metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 150} - def __init__(self, + def __init__( + self, ai1s=[], ai2s=[], partial_obs=False, max_steps=2000, render_theme=2, map_paths="maps/10x10/basesTwoWorkers10x10.xml", - reward_weight=np.array([0.0, 1.0, 0.0, 0.0, 0.0, 5.0])): + reward_weight=np.array([0.0, 1.0, 0.0, 0.0, 0.0, 5.0]), + ): self.ai1s = ai1s self.ai2s = ai2s @@ -227,7 +244,7 @@ def __init__(self, self.reward_weight = reward_weight # read map - self.microrts_path = os.path.join(gym_microrts.__path__[0], 'microrts') + self.microrts_path = os.path.join(gym_microrts.__path__[0], "microrts") root = ET.parse(os.path.join(self.microrts_path, self.map_paths[0])).getroot() self.height, self.width = int(root.get("height")), int(root.get("width")) @@ -237,9 +254,15 @@ def __init__(self, registerDomain("ai") registerDomain("rts") jars = [ - "microrts.jar", "lib/bots/Coac.jar", "lib/bots/Droplet.jar", "lib/bots/GRojoA3N.jar", - "lib/bots/Izanagi.jar", "lib/bots/MixedBot.jar", "lib/bots/TiamatBot.jar", "lib/bots/UMSBot.jar", - "lib/bots/mayariBot.jar" # "MindSeal.jar" + "microrts.jar", + "lib/bots/Coac.jar", + "lib/bots/Droplet.jar", + "lib/bots/GRojoA3N.jar", + "lib/bots/Izanagi.jar", + "lib/bots/MixedBot.jar", + "lib/bots/TiamatBot.jar", + "lib/bots/UMSBot.jar", + "lib/bots/mayariBot.jar", # "MindSeal.jar" ] for jar in jars: jpype.addClassPath(os.path.join(self.microrts_path, jar)) @@ -247,33 +270,44 @@ def __init__(self, # start microrts client from rts.units import UnitTypeTable + self.real_utt = UnitTypeTable() - from ai.rewardfunction import RewardFunctionInterface, WinLossRewardFunction, ResourceGatherRewardFunction, AttackRewardFunction, ProduceWorkerRewardFunction, ProduceBuildingRewardFunction, ProduceCombatUnitRewardFunction, CloserToEnemyBaseRewardFunction - self.rfs = JArray(RewardFunctionInterface)([ - WinLossRewardFunction(), - ResourceGatherRewardFunction(), - ProduceWorkerRewardFunction(), - ProduceBuildingRewardFunction(), - AttackRewardFunction(), - ProduceCombatUnitRewardFunction(), - # CloserToEnemyBaseRewardFunction(), - ]) + from ai.rewardfunction import (AttackRewardFunction, + ProduceBuildingRewardFunction, + ProduceCombatUnitRewardFunction, + ProduceWorkerRewardFunction, + ResourceGatherRewardFunction, + RewardFunctionInterface, + WinLossRewardFunction) + + self.rfs = JArray(RewardFunctionInterface)( + [ + WinLossRewardFunction(), + ResourceGatherRewardFunction(), + ProduceWorkerRewardFunction(), + ProduceBuildingRewardFunction(), + AttackRewardFunction(), + ProduceCombatUnitRewardFunction(), + # CloserToEnemyBaseRewardFunction(), + ] + ) self.start_client() # computed properties - # [num_planes_hp(5), num_planes_resources(5), num_planes_player(5), + # [num_planes_hp(5), num_planes_resources(5), num_planes_player(5), # num_planes_unit_type(z), num_planes_unit_action(6)] - self.num_planes = [5, 5, 3, len(self.utt['unitTypes'])+1, 6] + self.num_planes = [5, 5, 3, len(self.utt["unitTypes"]) + 1, 6] if partial_obs: - self.num_planes = [5, 5, 3, len(self.utt['unitTypes'])+1, 6, 2] + self.num_planes = [5, 5, 3, len(self.utt["unitTypes"]) + 1, 6, 2] self.observation_space = gym.spaces.Discrete(2) self.action_space = gym.spaces.Discrete(2) def start_client(self): - from ts import JNIGridnetVecClient as Client from ai.core import AI + from ts import JNIGridnetVecClient as Client + self.vec_client = Client( self.max_steps, self.rfs, @@ -290,7 +324,7 @@ def start_client(self): def reset(self): responses = self.vec_client.reset([0 for _ in range(self.num_envs)]) - raw_obs, reward, done, info = np.ones((self.num_envs,2)), np.array(responses.reward), np.array(responses.done), {} + raw_obs, reward, done, info = np.ones((self.num_envs, 2)), np.array(responses.reward), np.array(responses.done), {} return raw_obs def step_async(self, actions): @@ -298,9 +332,9 @@ def step_async(self, actions): def step_wait(self): responses = self.vec_client.gameStep(self.actions, [0 for _ in range(self.num_envs)]) - raw_obs, reward, done = np.ones((self.num_envs,2)), np.array(responses.reward), np.array(responses.done) + raw_obs, reward, done = np.ones((self.num_envs, 2)), np.array(responses.reward), np.array(responses.done) infos = [{"raw_rewards": item} for item in reward] - return raw_obs, reward @ self.reward_weight, done[:,0], infos + return raw_obs, reward @ self.reward_weight, done[:, 0], infos def step(self, ac): self.step_async(ac) @@ -320,10 +354,10 @@ def getattr_depth_check(self, name, already_found): def render(self, mode="human"): if mode == "human": self.render_client.render(False) - elif mode == 'rgb_array': + elif mode == "rgb_array": bytes_array = np.array(self.render_client.render(True)) image = Image.frombytes("RGB", (640, 640), bytes_array) - return np.array(image)[:,:,::-1] + return np.array(image)[:, :, ::-1] def close(self): if jpype._jpype.isStarted(): @@ -377,13 +411,13 @@ def _allocate_shared_buffer(self, nbytes): def start_client(self): - from ts import JNIGridnetSharedMemVecClient as Client from ai.core import AI from rts import GameState + from ts import JNIGridnetSharedMemVecClient as Client self.num_feature_planes = GameState.numFeaturePlanes num_unit_types = len(self.real_utt.getUnitTypes()) - self.action_space_dims = [6, 4, 4, 4, 4, num_unit_types, (self.real_utt.getMaxAttackRange()*2+1)**2] + self.action_space_dims = [6, 4, 4, 4, 4, num_unit_types, (self.real_utt.getMaxAttackRange() * 2 + 1) ** 2] self.masks_dim = sum(self.action_space_dims) self.action_dim = len(self.action_space_dims) @@ -394,11 +428,11 @@ def start_client(self): action_mask_nbytes = self.num_envs * self.height * self.width * self.masks_dim * 4 action_mask_jvm_buffer, action_mask_np_buffer = self._allocate_shared_buffer(action_mask_nbytes) - self.action_mask = action_mask_np_buffer.reshape((self.num_envs, self.height*self.width, self.masks_dim)) + self.action_mask = action_mask_np_buffer.reshape((self.num_envs, self.height * self.width, self.masks_dim)) - action_nbytes = self.num_envs * self.width*self.height * self.action_dim * 4 + action_nbytes = self.num_envs * self.width * self.height * self.action_dim * 4 action_jvm_buffer, action_np_buffer = self._allocate_shared_buffer(action_nbytes) - self.actions = action_np_buffer.reshape((self.num_envs, self.height*self.width, self.action_dim)) + self.actions = action_np_buffer.reshape((self.num_envs, self.height * self.width, self.action_dim)) self.vec_client = Client( self.num_selfplay_envs, @@ -415,23 +449,25 @@ def start_client(self): action_jvm_buffer, 0, ) - self.render_client = self.vec_client.selfPlayClients[0] if len(self.vec_client.selfPlayClients) > 0 else self.vec_client.clients[0] + self.render_client = ( + self.vec_client.selfPlayClients[0] if len(self.vec_client.selfPlayClients) > 0 else self.vec_client.clients[0] + ) # get the unit type table self.utt = json.loads(str(self.render_client.sendUTT())) def reset(self): - self.vec_client.reset([0]*self.num_envs) + self.vec_client.reset([0] * self.num_envs) return self.obs def step_async(self, actions): - actions = actions.reshape((self.num_envs, self.width*self.height, self.action_dim)) + actions = actions.reshape((self.num_envs, self.width * self.height, self.action_dim)) np.copyto(self.actions, actions) def step_wait(self): - responses = self.vec_client.gameStep([0]*self.num_envs) + responses = self.vec_client.gameStep([0] * self.num_envs) reward, done = np.array(responses.reward), np.array(responses.done) infos = [{"raw_rewards": item} for item in reward] - return self.obs, reward @ self.reward_weight, done[:,0], infos + return self.obs, reward @ self.reward_weight, done[:, 0], infos def get_action_mask(self): self.vec_client.getMasks(0) diff --git a/gym_microrts/microrts_ai.py b/gym_microrts/microrts_ai.py index cf2be398..7b93e1c5 100644 --- a/gym_microrts/microrts_ai.py +++ b/gym_microrts/microrts_ai.py @@ -1,78 +1,114 @@ def randomBiasedAI(utt): from ai import RandomBiasedAI + return RandomBiasedAI() + def randomAI(utt): from ai import RandomBiasedSingleUnitAI + return RandomBiasedSingleUnitAI() + def passiveAI(utt): from ai import PassiveAI + return PassiveAI() + def workerRushAI(utt): from ai.abstraction import WorkerRush + return WorkerRush(utt) + def lightRushAI(utt): from ai.abstraction import LightRush + return LightRush(utt) + def POLightRush(utt): from ai.abstraction.partialobservability import POLightRush + return POLightRush(utt) + def POWorkerRush(utt): from ai.abstraction.partialobservability import POWorkerRush + return POWorkerRush(utt) + def POHeavyRush(utt): from ai.abstraction.partialobservability import POHeavyRush + return POHeavyRush(utt) + def PORangedRush(utt): from ai.abstraction.partialobservability import PORangedRush + return PORangedRush(utt) + + # Competition AIs + def coacAI(utt): from ai.coac import CoacAI + return CoacAI(utt) + def naiveMCTSAI(utt): from ai.mcts.naivemcts import NaiveMCTS + return NaiveMCTS(utt) + # /~https://github.com/AmoyZhp/MixedBotmRTS def mixedBot(utt): from ai.JZ import MixedBot + return MixedBot(utt) + # /~https://github.com/jr9Hernandez/RojoBot def rojo(utt): from ai.competition.rojobot import Rojo + return Rojo(utt) + # /~https://github.com/rubensolv/IzanagiBot def izanagi(utt): from ai.competition.IzanagiBot import Izanagi + return Izanagi(utt) + # /~https://github.com/jr9Hernandez/TiamatBot def tiamat(utt): from ai.competition.tiamat import Tiamat + return Tiamat(utt) + # /~https://github.com/zuozhiyang/Droplet/blob/master/GNS/Droplet.java def droplet(utt): from GNS import Droplet + return Droplet(utt) + # /~https://github.com/barvazkrav/mayariBot/blob/master/mayari.java def mayari(utt): from mayariBot import mayari + return mayari(utt) + # # /~https://github.com/zuozhiyang/MentalSeal # def mentalSeal(utt): # from MentalSeal import MentalSeal @@ -81,8 +117,10 @@ def mayari(utt): # /~https://github.com/rubensolv/GRojoA3N def guidedRojoA3N(utt): from ai.competition.GRojoA3N import GuidedRojoA3N + return GuidedRojoA3N(utt) + ALL_AIS = [ randomBiasedAI, randomAI, @@ -91,4 +129,4 @@ def guidedRojoA3N(utt): lightRushAI, coacAI, naiveMCTSAI, -] \ No newline at end of file +] diff --git a/gym_microrts/microrts_maps.py b/gym_microrts/microrts_maps.py index cfa6f477..34a2f2e9 100644 --- a/gym_microrts/microrts_maps.py +++ b/gym_microrts/microrts_maps.py @@ -18,4 +18,4 @@ "maps/16x16/basesWorkers16x16H.xml", "maps/16x16/basesWorkers16x16L.xml", "maps/16x16/EightBasesWorkers16x16.xml", -] \ No newline at end of file +] diff --git a/hello_world.py b/hello_world.py index dac3e8a0..999e9847 100644 --- a/hello_world.py +++ b/hello_world.py @@ -1,11 +1,13 @@ import numpy as np from numpy.random import choice -# if you want to record videos, install stable-baselines3 and use its `VecVideoRecorder` -# from stable_baselines3.common.vec_env import VecVideoRecorder from gym_microrts import microrts_ai from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv +# if you want to record videos, install stable-baselines3 and use its `VecVideoRecorder` +# from stable_baselines3.common.vec_env import VecVideoRecorder + + envs = MicroRTSGridModeVecEnv( num_selfplay_envs=0, num_bot_envs=1, @@ -13,29 +15,33 @@ render_theme=2, ai2s=[microrts_ai.coacAI for _ in range(1)], map_paths=["maps/16x16/basesWorkers16x16.xml"], - reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]) + reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]), ) # envs = VecVideoRecorder(envs, 'videos', record_video_trigger=lambda x: x % 4000 == 0, video_length=2000) + def softmax(x): """Compute softmax values for each sets of scores in x.""" e_x = np.exp(x - np.max(x)) return e_x / e_x.sum() + def sample(logits): # sample 1 or 2 from logits [0, 1 ,1, 0] but not 0 or 3 - if sum(logits) == 0: return 0 - return choice(range(len(logits)), p=logits/sum(logits)) + if sum(logits) == 0: + return 0 + return choice(range(len(logits)), p=logits / sum(logits)) + envs.action_space.seed(0) envs.reset() print(envs.action_plane_space.nvec) nvec = envs.action_space.nvec + def sample(logits): - return np.array( - [choice(range(len(item)), p=softmax(item)) for item in logits] - ).reshape(-1, 1) + return np.array([choice(range(len(item)), p=softmax(item)) for item in logits]).reshape(-1, 1) + for i in range(10000): envs.render() @@ -45,16 +51,19 @@ def sample(logits): # but we want to remove PyTorch as a core dependency... action_mask = envs.get_action_mask() action_mask = action_mask.reshape(-1, action_mask.shape[-1]) - action_type_mask = action_mask[:,0:6] - action = np.concatenate(( - sample(action_mask[:,0:6]), # action type - sample(action_mask[:,6:10]), # move parameter - sample(action_mask[:,10:14]), # harvest parameter - sample(action_mask[:,14:18]), # return parameter - sample(action_mask[:,18:22]), # produce_direction parameter - sample(action_mask[:,22:29]), # produce_unit_type parameter - sample(action_mask[:,29:sum(envs.action_space.nvec[1:])]), # attack_target parameter - ), axis=1) + action_type_mask = action_mask[:, 0:6] + action = np.concatenate( + ( + sample(action_mask[:, 0:6]), # action type + sample(action_mask[:, 6:10]), # move parameter + sample(action_mask[:, 10:14]), # harvest parameter + sample(action_mask[:, 14:18]), # return parameter + sample(action_mask[:, 18:22]), # produce_direction parameter + sample(action_mask[:, 22:29]), # produce_unit_type parameter + sample(action_mask[:, 29 : sum(envs.action_space.nvec[1:])]), # attack_target parameter + ), + axis=1, + ) action = np.array([envs.action_space.sample()]) next_obs, reward, done, info = envs.step(action) envs.close() diff --git a/tests/test_e2e.py b/tests/test_e2e.py index 3ab9b22c..4abeff36 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -9,12 +9,11 @@ def test_ppo_gridnet(): shell=True, check=True, ) - except subprocess.CalledProcessError as grepexc: + except subprocess.CalledProcessError as grepexc: print("error code", grepexc.returncode, grepexc.output) assert grepexc.returncode in [0, 134] - def test_ppo_gridnet_eval_selfplay(): try: subprocess.run( @@ -22,7 +21,7 @@ def test_ppo_gridnet_eval_selfplay(): shell=True, check=True, ) - except subprocess.CalledProcessError as grepexc: + except subprocess.CalledProcessError as grepexc: print("error code", grepexc.returncode, grepexc.output) assert grepexc.returncode in [0, 134] diff --git a/tests/test_mask.py b/tests/test_mask.py index 414865f2..0d74fef3 100644 --- a/tests/test_mask.py +++ b/tests/test_mask.py @@ -1,11 +1,11 @@ import numpy as np - from gym_microrts import microrts_ai from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv render = False + def test_mask(): envs = MicroRTSGridModeVecEnv( num_selfplay_envs=0, @@ -14,70 +14,271 @@ def test_mask(): render_theme=2, ai2s=[microrts_ai.passiveAI for _ in range(1)], map_paths=["maps/4x4/baseTwoWorkers4x4.xml"], - reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]) + reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]), ) envs.action_space.seed(0) try: - obs = envs.reset() + envs.reset() # if render: envs.render() except Exception as e: e.printStackTrace() - num_planes = len(envs.action_plane_space.nvec) + len(envs.action_plane_space.nvec) np.testing.assert_array_equal( - np.array(envs.get_action_mask())[0,1], - np.array([ - 1, 1, 1, 0, 1, 0, - 0, 1, 0, 0, - 0, 0, 0, 1, - 0, 0, 0, 0, - 0, 1, 0, 0, - 0, 0, 1, 0, 0, 0, 0, - # relative attack position below - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - ]).astype(np.int32), + np.array(envs.get_action_mask())[0, 1], + np.array( + [ + 1, + 1, + 1, + 0, + 1, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + # relative attack position below + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ] + ).astype(np.int32), ) np.testing.assert_array_equal( - np.array(envs.get_action_mask())[0,4], - np.array([ - 1, 1, 1, 0, 1, 0, - 0, 0, 1, 0, - 1, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 1, 0, - 0, 0, 1, 0, 0, 0, 0, - # relative attack position below - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - ]).astype(np.int32), + np.array(envs.get_action_mask())[0, 4], + np.array( + [ + 1, + 1, + 1, + 0, + 1, + 0, + 0, + 0, + 1, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + # relative attack position below + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ] + ).astype(np.int32), ) np.testing.assert_array_equal( - np.array(envs.get_action_mask())[0,5], - np.array([ - 1, 0, 0, 0, 1, 0, - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 1, 1, 0, - 0, 0, 0, 1, 0, 0, 0, - # relative attack position below - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - ]).astype(np.int32), + np.array(envs.get_action_mask())[0, 5], + np.array( + [ + 1, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 1, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + # relative attack position below + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ] + ).astype(np.int32), ) diff --git a/tests/test_observation.py b/tests/test_observation.py index b7eaf5f0..23baeee9 100644 --- a/tests/test_observation.py +++ b/tests/test_observation.py @@ -1,11 +1,10 @@ import numpy as np - -from gym_microrts import microrts_ai from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv render = False + def test_observation(): envs = MicroRTSGridModeVecEnv( num_bot_envs=0, @@ -19,38 +18,146 @@ def test_observation(): ) next_obs = envs.reset() - resource = np.array([ - 0., 1., 0., 0., 0., # 1 hp - 0., 0., 0., 0., 1., # >= 4 resources - 1., 0., 0., # no owner - 0., 1., 0., 0., 0., 0., 0., 0., # unit type resource - 1., 0., 0., 0., 0., 0. # currently not executing actions - ]).astype(np.int32) - p1_worker = np.array([ - 0., 1., 0., 0., 0., # 1 hp - 1., 0., 0., 0., 0., # 0 resources - 0., 1., 0., # player 1 owns it - 0., 0., 0., 0., 1., 0., 0., 0., # unit type worker - 1., 0., 0., 0., 0., 0. # currently not executing actions - ]).astype(np.int32) - p1_base = np.array([ - 0., 0., 0., 0., 1., # 1 hp - 1., 0., 0., 0., 0., # 0 resources - 0., 1., 0., # player 1 owns it - 0., 0., 1., 0., 0., 0., 0., 0., # unit type base - 1., 0., 0., 0., 0., 0. # currently not executing actions - ]).astype(np.int32) + resource = np.array( + [ + 0.0, + 1.0, + 0.0, + 0.0, + 0.0, # 1 hp + 0.0, + 0.0, + 0.0, + 0.0, + 1.0, # >= 4 resources + 1.0, + 0.0, + 0.0, # no owner + 0.0, + 1.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, # unit type resource + 1.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, # currently not executing actions + ] + ).astype(np.int32) + p1_worker = np.array( + [ + 0.0, + 1.0, + 0.0, + 0.0, + 0.0, # 1 hp + 1.0, + 0.0, + 0.0, + 0.0, + 0.0, # 0 resources + 0.0, + 1.0, + 0.0, # player 1 owns it + 0.0, + 0.0, + 0.0, + 0.0, + 1.0, + 0.0, + 0.0, + 0.0, # unit type worker + 1.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, # currently not executing actions + ] + ).astype(np.int32) + p1_base = np.array( + [ + 0.0, + 0.0, + 0.0, + 0.0, + 1.0, # 1 hp + 1.0, + 0.0, + 0.0, + 0.0, + 0.0, # 0 resources + 0.0, + 1.0, + 0.0, # player 1 owns it + 0.0, + 0.0, + 1.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, # unit type base + 1.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, # currently not executing actions + ] + ).astype(np.int32) p2_worker = p1_worker.copy() - p2_worker[10:13] = np.array([0., 0., 1.,]) # player 2 owns it + p2_worker[10:13] = np.array( + [ + 0.0, + 0.0, + 1.0, + ] + ) # player 2 owns it p2_base = p1_base.copy() - p2_base[10:13] = np.array([0., 0., 1.,]) # player 2 owns it - empty_cell = np.array([ - 1., 0., 0., 0., 0., # 0 hp - 1., 0., 0., 0., 0., # 0 resources - 1., 0., 0., # no owner - 1., 0., 0., 0., 0., 0., 0., 0., # unit type empty cell - 1., 0., 0., 0., 0., 0. # currently not executing actions - ]).astype(np.int32) + p2_base[10:13] = np.array( + [ + 0.0, + 0.0, + 1.0, + ] + ) # player 2 owns it + empty_cell = np.array( + [ + 1.0, + 0.0, + 0.0, + 0.0, + 0.0, # 0 hp + 1.0, + 0.0, + 0.0, + 0.0, + 0.0, # 0 resources + 1.0, + 0.0, + 0.0, # no owner + 1.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, # unit type empty cell + 1.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, # currently not executing actions + ] + ).astype(np.int32) # player 1's perspective np.testing.assert_array_equal(next_obs[0][0][0], resource) @@ -72,11 +179,9 @@ def test_observation(): # np.testing.assert_array_equal(next_obs[1][14][15], resource) # BUG: in `MicroRTSGridModeVecEnv` the onwer is correctly set to [0, 1, 0] np.testing.assert_array_equal(next_obs[1][14][14], p1_worker) np.testing.assert_array_equal(next_obs[1][13][13], p1_base) - feature_sum = 0 for item in [resource, resource, p1_worker, p1_base, resource, resource, p2_worker, p2_base]: feature_sum += item.sum() feature_sum += empty_cell.sum() * (256 - 8) assert next_obs.sum() == feature_sum * 2 == 2560.0 - diff --git a/tests/test_reward.py b/tests/test_reward.py index f1de2f6e..90314c59 100644 --- a/tests/test_reward.py +++ b/tests/test_reward.py @@ -1,11 +1,11 @@ import numpy as np - from gym_microrts import microrts_ai from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv render = False + def test_reward(): envs = MicroRTSGridModeVecEnv( num_selfplay_envs=0, @@ -14,37 +14,41 @@ def test_reward(): render_theme=2, ai2s=[microrts_ai.passiveAI for _ in range(1)], map_paths=["maps/4x4/baseTwoWorkers4x4.xml"], - reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]) + reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]), ) envs.action_space.seed(0) try: - obs = envs.reset() - if render: envs.render() + envs.reset() + if render: + envs.render() except Exception as e: e.printStackTrace() num_planes = len(envs.action_plane_space.nvec) - # mine np.array(envs.get_action_mask()) action = np.zeros(len(envs.action_space.nvec), np.int32) - action[1*num_planes:(1+1)*num_planes] = [2, 0, 3, 0, 0, 0, 0] + action[1 * num_planes : (1 + 1) * num_planes] = [2, 0, 3, 0, 0, 0, 0] assert envs.step(action)[1].flatten() > 0 - if render: envs.render() + if render: + envs.render() # wait for action to finish for _ in range(20): np.array(envs.get_action_mask()) action = np.zeros(len(envs.action_space.nvec), np.int32) envs.step(action) - if render: envs.render() + if render: + envs.render() # return np.array(envs.get_action_mask()) action = np.zeros(len(envs.action_space.nvec), np.int32) - action[1*num_planes:(1+1)*num_planes] = [3, 0, 0, 2, 0, 0, 0] + action[1 * num_planes : (1 + 1) * num_planes] = [3, 0, 0, 2, 0, 0, 0] assert envs.step(action)[1].flatten() > 0 - if render: envs.render() + if render: + envs.render() + def test_produce_worker_reward(): envs = MicroRTSGridModeVecEnv( @@ -54,23 +58,24 @@ def test_produce_worker_reward(): render_theme=2, ai2s=[microrts_ai.passiveAI for _ in range(1)], map_paths=["maps/4x4/baseTwoWorkers4x4.xml"], - reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]) + reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]), ) envs.action_space.seed(0) try: - obs = envs.reset() - if render: envs.render() + envs.reset() + if render: + envs.render() except Exception as e: e.printStackTrace() num_planes = len(envs.action_plane_space.nvec) - # produce a worker np.array(envs.get_action_mask()) action = np.zeros(len(envs.action_space.nvec), np.int32) - action[5*num_planes:(5+1)*num_planes] = [4, 0, 0, 0, 1, 3, 0] + action[5 * num_planes : (5 + 1) * num_planes] = [4, 0, 0, 0, 1, 3, 0] assert envs.step(action)[1].flatten() > 0 - if render: envs.render() + if render: + envs.render() def test_produce_buildings_reward(): @@ -81,20 +86,21 @@ def test_produce_buildings_reward(): render_theme=2, ai2s=[microrts_ai.passiveAI for _ in range(1)], map_paths=["maps/4x4/baseTwoWorkers4x4.xml"], - reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]) + reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]), ) envs.action_space.seed(0) try: - obs = envs.reset() - if render: envs.render() + envs.reset() + if render: + envs.render() except Exception as e: e.printStackTrace() num_planes = len(envs.action_plane_space.nvec) - # produce a worker np.array(envs.get_action_mask()) action = np.zeros(len(envs.action_space.nvec), np.int32) - action[4*num_planes:(4+1)*num_planes] = [4, 0, 0, 0, 2, 2, 0] + action[4 * num_planes : (4 + 1) * num_planes] = [4, 0, 0, 0, 2, 2, 0] assert envs.step(action)[1].flatten() > 0 - if render: envs.render() + if render: + envs.render() From a75b3058ac240da6fa8031422bea3d5ef352b3c8 Mon Sep 17 00:00:00 2001 From: Costa Huang Date: Wed, 26 Jan 2022 15:04:51 -0500 Subject: [PATCH 3/6] add black profile --- .pre-commit-config.yaml | 3 ++- experiments/league.py | 14 +++++++++++--- experiments/ppo_gridnet.py | 3 +-- gym_microrts/envs/vec_env.py | 32 ++++++++++++++++++-------------- pyproject.toml | 2 +- 5 files changed, 33 insertions(+), 21 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index dcc8ad9e..0f8d0b76 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,7 +4,8 @@ repos: hooks: - id: isort args: - - --skip wandb + - --profile=black + - --skip=wandb - repo: /~https://github.com/myint/autoflake rev: v1.4 hooks: diff --git a/experiments/league.py b/experiments/league.py index bb708928..aa315a94 100644 --- a/experiments/league.py +++ b/experiments/league.py @@ -12,9 +12,17 @@ import numpy as np import pandas as pd import torch -from peewee import (JOIN, CharField, DateTimeField, FloatField, - ForeignKeyField, Model, SmallIntegerField, SqliteDatabase, - fn) +from peewee import ( + JOIN, + CharField, + DateTimeField, + FloatField, + ForeignKeyField, + Model, + SmallIntegerField, + SqliteDatabase, + fn, +) from ppo_gridnet import Agent, MicroRTSStatsRecorder from stable_baselines3.common.vec_env import VecMonitor from trueskill import Rating, quality_1vs1, rate_1vs1 diff --git a/experiments/ppo_gridnet.py b/experiments/ppo_gridnet.py index 4754c4b3..4e8945f1 100644 --- a/experiments/ppo_gridnet.py +++ b/experiments/ppo_gridnet.py @@ -13,8 +13,7 @@ import torch.nn as nn import torch.optim as optim from gym.spaces import MultiDiscrete -from stable_baselines3.common.vec_env import (VecEnvWrapper, VecMonitor, - VecVideoRecorder) +from stable_baselines3.common.vec_env import VecEnvWrapper, VecMonitor, VecVideoRecorder from torch.distributions.categorical import Categorical from torch.utils.tensorboard import SummaryWriter diff --git a/gym_microrts/envs/vec_env.py b/gym_microrts/envs/vec_env.py index 2a509ae2..5bc1ec5f 100644 --- a/gym_microrts/envs/vec_env.py +++ b/gym_microrts/envs/vec_env.py @@ -82,13 +82,15 @@ def __init__( from rts.units import UnitTypeTable self.real_utt = UnitTypeTable() - from ai.rewardfunction import (AttackRewardFunction, - ProduceBuildingRewardFunction, - ProduceCombatUnitRewardFunction, - ProduceWorkerRewardFunction, - ResourceGatherRewardFunction, - RewardFunctionInterface, - WinLossRewardFunction) + from ai.rewardfunction import ( + AttackRewardFunction, + ProduceBuildingRewardFunction, + ProduceCombatUnitRewardFunction, + ProduceWorkerRewardFunction, + ResourceGatherRewardFunction, + RewardFunctionInterface, + WinLossRewardFunction, + ) self.rfs = JArray(RewardFunctionInterface)( [ @@ -272,13 +274,15 @@ def __init__( from rts.units import UnitTypeTable self.real_utt = UnitTypeTable() - from ai.rewardfunction import (AttackRewardFunction, - ProduceBuildingRewardFunction, - ProduceCombatUnitRewardFunction, - ProduceWorkerRewardFunction, - ResourceGatherRewardFunction, - RewardFunctionInterface, - WinLossRewardFunction) + from ai.rewardfunction import ( + AttackRewardFunction, + ProduceBuildingRewardFunction, + ProduceCombatUnitRewardFunction, + ProduceWorkerRewardFunction, + ResourceGatherRewardFunction, + RewardFunctionInterface, + WinLossRewardFunction, + ) self.rfs = JArray(RewardFunctionInterface)( [ diff --git a/pyproject.toml b/pyproject.toml index c31cf1f1..c9ee721c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,4 +42,4 @@ build-backend = "poetry.core.masonry.api" [tool.poetry.extras] spyder = ["spyder"] -cleanrl = ["cleanrl"] \ No newline at end of file +cleanrl = ["cleanrl"] From bcbe3beeef7784d808d419ae91385c19ad6f18ae Mon Sep 17 00:00:00 2001 From: Costa Huang Date: Wed, 26 Jan 2022 17:12:06 -0500 Subject: [PATCH 4/6] revert formatting --- tests/test_mask.py | 305 +++++++------------------------------- tests/test_observation.py | 170 ++++----------------- 2 files changed, 85 insertions(+), 390 deletions(-) diff --git a/tests/test_mask.py b/tests/test_mask.py index 0d74fef3..71c63007 100644 --- a/tests/test_mask.py +++ b/tests/test_mask.py @@ -24,261 +24,62 @@ def test_mask(): e.printStackTrace() len(envs.action_plane_space.nvec) + # fmt: off np.testing.assert_array_equal( - np.array(envs.get_action_mask())[0, 1], - np.array( - [ - 1, - 1, - 1, - 0, - 1, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - # relative attack position below - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ] - ).astype(np.int32), + np.array(envs.get_action_mask())[0,1], + np.array([ + 1, 1, 1, 0, 1, 0, + 0, 1, 0, 0, + 0, 0, 0, 1, + 0, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, 1, 0, 0, 0, 0, + # relative attack position below + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + ]).astype(np.int32), ) np.testing.assert_array_equal( - np.array(envs.get_action_mask())[0, 4], - np.array( - [ - 1, - 1, - 1, - 0, - 1, - 0, - 0, - 0, - 1, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - # relative attack position below - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ] - ).astype(np.int32), + np.array(envs.get_action_mask())[0,4], + np.array([ + 1, 1, 1, 0, 1, 0, + 0, 0, 1, 0, + 1, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, 0, + # relative attack position below + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + ]).astype(np.int32), ) np.testing.assert_array_equal( - np.array(envs.get_action_mask())[0, 5], - np.array( - [ - 1, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 1, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - # relative attack position below - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ] - ).astype(np.int32), + np.array(envs.get_action_mask())[0,5], + np.array([ + 1, 0, 0, 0, 1, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 1, 1, 0, + 0, 0, 0, 1, 0, 0, 0, + # relative attack position below + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + ]).astype(np.int32), ) + # fmt: on diff --git a/tests/test_observation.py b/tests/test_observation.py index 23baeee9..29c311ba 100644 --- a/tests/test_observation.py +++ b/tests/test_observation.py @@ -17,147 +17,41 @@ def test_observation(): reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]), ) + # fmt: off next_obs = envs.reset() - resource = np.array( - [ - 0.0, - 1.0, - 0.0, - 0.0, - 0.0, # 1 hp - 0.0, - 0.0, - 0.0, - 0.0, - 1.0, # >= 4 resources - 1.0, - 0.0, - 0.0, # no owner - 0.0, - 1.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, # unit type resource - 1.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, # currently not executing actions - ] - ).astype(np.int32) - p1_worker = np.array( - [ - 0.0, - 1.0, - 0.0, - 0.0, - 0.0, # 1 hp - 1.0, - 0.0, - 0.0, - 0.0, - 0.0, # 0 resources - 0.0, - 1.0, - 0.0, # player 1 owns it - 0.0, - 0.0, - 0.0, - 0.0, - 1.0, - 0.0, - 0.0, - 0.0, # unit type worker - 1.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, # currently not executing actions - ] - ).astype(np.int32) - p1_base = np.array( - [ - 0.0, - 0.0, - 0.0, - 0.0, - 1.0, # 1 hp - 1.0, - 0.0, - 0.0, - 0.0, - 0.0, # 0 resources - 0.0, - 1.0, - 0.0, # player 1 owns it - 0.0, - 0.0, - 1.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, # unit type base - 1.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, # currently not executing actions - ] - ).astype(np.int32) + resource = np.array([ + 0., 1., 0., 0., 0., # 1 hp + 0., 0., 0., 0., 1., # >= 4 resources + 1., 0., 0., # no owner + 0., 1., 0., 0., 0., 0., 0., 0., # unit type resource + 1., 0., 0., 0., 0., 0. # currently not executing actions + ]).astype(np.int32) + p1_worker = np.array([ + 0., 1., 0., 0., 0., # 1 hp + 1., 0., 0., 0., 0., # 0 resources + 0., 1., 0., # player 1 owns it + 0., 0., 0., 0., 1., 0., 0., 0., # unit type worker + 1., 0., 0., 0., 0., 0. # currently not executing actions + ]).astype(np.int32) + p1_base = np.array([ + 0., 0., 0., 0., 1., # 1 hp + 1., 0., 0., 0., 0., # 0 resources + 0., 1., 0., # player 1 owns it + 0., 0., 1., 0., 0., 0., 0., 0., # unit type base + 1., 0., 0., 0., 0., 0. # currently not executing actions + ]).astype(np.int32) p2_worker = p1_worker.copy() - p2_worker[10:13] = np.array( - [ - 0.0, - 0.0, - 1.0, - ] - ) # player 2 owns it + p2_worker[10:13] = np.array([0., 0., 1.,]) # player 2 owns it p2_base = p1_base.copy() - p2_base[10:13] = np.array( - [ - 0.0, - 0.0, - 1.0, - ] - ) # player 2 owns it - empty_cell = np.array( - [ - 1.0, - 0.0, - 0.0, - 0.0, - 0.0, # 0 hp - 1.0, - 0.0, - 0.0, - 0.0, - 0.0, # 0 resources - 1.0, - 0.0, - 0.0, # no owner - 1.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, # unit type empty cell - 1.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, # currently not executing actions - ] - ).astype(np.int32) + p2_base[10:13] = np.array([0., 0., 1.,]) # player 2 owns it + empty_cell = np.array([ + 1., 0., 0., 0., 0., # 0 hp + 1., 0., 0., 0., 0., # 0 resources + 1., 0., 0., # no owner + 1., 0., 0., 0., 0., 0., 0., 0., # unit type empty cell + 1., 0., 0., 0., 0., 0. # currently not executing actions + ]).astype(np.int32) + # fmt: on # player 1's perspective np.testing.assert_array_equal(next_obs[0][0][0], resource) From bdc2e4af8f0bae7073e6ddd12c4ba4c00a797dfd Mon Sep 17 00:00:00 2001 From: Costa Huang Date: Thu, 27 Jan 2022 11:36:17 -0500 Subject: [PATCH 5/6] Add precommit CI --- .github/workflows/pre-commit.yml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 .github/workflows/pre-commit.yml diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 00000000..0e138fde --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,24 @@ +name: Pre-commit + +on: + push: + branches: [ '*' ] + pull_request: + branches: [ main ] +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.9] + + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + submodules: recursive + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - uses: pre-commit/action@v2.0.3 From 2e2786d9f5ce6e4610099bb922c2171500113108 Mon Sep 17 00:00:00 2001 From: Costa Huang Date: Thu, 27 Jan 2022 11:39:00 -0500 Subject: [PATCH 6/6] change names --- .github/workflows/pre-commit.yml | 2 +- .github/workflows/pypi.yml | 2 +- .github/workflows/tests.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 0e138fde..9176620a 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -1,4 +1,4 @@ -name: Pre-commit +name: pre-commit on: push: diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index a1b6cb16..4625d8e3 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -1,4 +1,4 @@ -name: build +name: build and distribute on: push: diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 0c923067..ce900919 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,4 +1,4 @@ -name: Tests +name: tests on: push: