Skip to content

Commit

Permalink
Merge pull request #14 from opendilab/gl-dev
Browse files Browse the repository at this point in the history
v0.3.2 pull request
  • Loading branch information
RobinC94 authored Apr 25, 2022
2 parents cc7f47b + c29ebcd commit ab21597
Show file tree
Hide file tree
Showing 18 changed files with 97 additions and 68 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
## v0.3.2 (2022.4.24)
- Update banner logo
- Update to DI-engine 0.3, modify env properties
- Fix Basic policy bug
- Fix CILRS model bug
- Fix bugs in evaluator
- Fix bug in ppo config
- Fix bug in simple rl pipeline
- Add pre-train weights path in foc


## v0.3.1 (2022.2.25)
- Add MetaDrive Macro Env
- Add common models
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# DI-drive

<img src="./docs/figs/di-drive_logo.png" width="200" alt="icon"/>
<img src="./docs/figs/di-drive_banner.png" alt="icon"/>

Updated on 2022.2.25 DI-drive-v0.3.1 (beta)
Updated on 2022.4.16 DI-drive-v0.3.2 (beta)

DI-drive - Decision Intelligence Platform for Autonomous Driving simulation.

Expand Down
2 changes: 1 addition & 1 deletion core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
__TITLE__ = "DI-drive"
__VERSION__ = "0.3.1"
__VERSION__ = "0.3.2"
__DESCRIPTION__ = "Decision AI Auto-Driving Platform"
__AUTHOR__ = "OpenDILab Contributors"
__AUTHOR_EMAIL__ = "opendilab.contact@gmail.com"
Expand Down
33 changes: 3 additions & 30 deletions core/envs/drive_env_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from core.eval.carla_benchmark_evaluator import get_suites_list, read_pose_txt, get_benchmark_dir
from .base_drive_env import BaseDriveEnv
from ding.utils.default_helper import deep_merge_dicts
from ding.envs.env.base_env import BaseEnvTimestep, BaseEnvInfo
from ding.envs.env.base_env import BaseEnvTimestep
from ding.envs.common.env_element import EnvElementInfo
from ding.torch_utils.data_helper import to_ndarray

Expand Down Expand Up @@ -37,6 +37,8 @@ def __init__(self, env: BaseDriveEnv, cfg: Dict = None, **kwargs) -> None:
else:
self._cfg = cfg
self.env = env
if not hasattr(self.env, 'reward_space'):
self.reward_space = gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(1, ))

def reset(self, *args, **kwargs) -> Any:
"""
Expand Down Expand Up @@ -83,35 +85,6 @@ def seed(self, seed: int, dynamic_seed: bool = True) -> None:
self._dynamic_seed = dynamic_seed
np.random.seed(self._seed)

def info(self) -> BaseEnvInfo:
"""
Interface of ``info`` method to suit DI-engine format env.
It returns a namedtuple ``BaseEnvInfo`` defined in DI-engine
which contains information about observation, action and reward space.
:Returns:
BaseEnvInfo: Env information instance defined in DI-engine.
"""
obs_space = EnvElementInfo(shape=self.env.observation_space, value={'min': 0., 'max': 1., 'dtype': np.float32})
act_space = EnvElementInfo(
shape=self.env.action_space,
value={
'min': np.float32("-inf"),
'max': np.float32("inf"),
'dtype': np.float32
},
)
rew_space = EnvElementInfo(
shape=1,
value={
'min': np.float32("-inf"),
'max': np.float32("inf")
},
)
return BaseEnvInfo(
agent_num=1, obs_space=obs_space, act_space=act_space, rew_space=rew_space, use_wrappers=None
)

def enable_save_replay(self, replay_path: Optional[str] = None) -> None:
if replay_path is None:
replay_path = './video'
Expand Down
1 change: 1 addition & 0 deletions core/envs/scenario_carla_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class ScenarioCarlaEnv(BaseDriveEnv):

action_space = spaces.Dict({})
observation_space = spaces.Dict({})
reward_space = spaces.Box(low=-float('inf'), high=float('inf'), shape=(1, ))
config = dict(
simulator=dict(),
# reward value if success
Expand Down
1 change: 1 addition & 0 deletions core/envs/simple_carla_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class SimpleCarlaEnv(BaseDriveEnv):
metadata = {'render.modes': ['rgb_array']}
action_space = spaces.Dict({})
observation_space = spaces.Dict({})
reward_space = spaces.Box(low=-float('inf'), high=float('inf'), shape=(1, ))
reward_type = ['goal', 'distance', 'speed', 'angle', 'steer', 'lane', 'failure']
config = dict(
simulator=dict(),
Expand Down
2 changes: 1 addition & 1 deletion core/eval/serial_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def should_eval(self, train_iter: int) -> bool:
:Returns:
bool: Whether should run iteration
"""
if (train_iter - self._last_eval_iter) < self._cfg.eval_freq and train_iter != 0:
if (train_iter - self._last_eval_iter) < self._cfg.eval_freq and train_iter > 0:
return False
self._last_eval_iter = train_iter
return True
Expand Down
3 changes: 2 additions & 1 deletion core/models/cilrs_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def __init__(
nn.ReLU(True),
nn.Linear(hidden_size, embedding_dim),
)
embedding_dim *= 2

# Project feature to speed prediction
if predict_speed:
Expand Down Expand Up @@ -94,7 +95,7 @@ def forward(self, embedding, speed, command):
if self._input_speed:
if len(speed.shape) == 1:
speed = speed.unsqueeze(1)
embedding += self._speed_in(speed)
embedding = torch.cat([embedding, self._speed_in(speed)], 1)

control_pred = 0.
for i, branch in enumerate(self._branches):
Expand Down
9 changes: 9 additions & 0 deletions core/policy/base_carla_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,15 @@ def _get_train_sample(self, data: Any) -> Optional[List]:
data = list(data)
return data

def _state_dict_learn(self) -> Dict[str, Any]:
return {'model': self._model.state_dict()}

def _state_dict_collect(self) -> Dict[str, Any]:
return {'model': self._model.state_dict()}

def _state_dict_eval(self) -> Dict[str, Any]:
return {'model': self._model.state_dict()}

def _load_state_dict_learn(self, state_dict: Dict[str, Any]) -> None:
self._model.load_state_dict(state_dict['model'], strict=True)

Expand Down
10 changes: 8 additions & 2 deletions demo/cilrs/cilrs_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@ def train(policy, optimizer, loader, tb_logger=None, start_iter=0):
optimizer.step()
log_vars['cur_lr'] = optimizer.defaults['lr']
for k, v in log_vars.items():
loss_epoch[k] += [log_vars[k].item()]
if isinstance(v, torch.Tensor):
loss_epoch[k] += [log_vars[k].item()]
else:
loss_epoch[k] += [log_vars[k]]
if iter_num % 50 == 0 and tb_logger is not None:
tb_logger.add_scalar("train_iter/" + k, v, iter_num)
iter_num += 1
Expand All @@ -78,7 +81,10 @@ def validate(policy, loader, tb_logger=None, epoch=0):
with torch.no_grad():
log_vars = policy.forward(data)
for k in list(log_vars.keys()):
loss_epoch[k] += [log_vars[k]]
if isinstance(log_vars[k], torch.Tensor):
loss_epoch[k] += [log_vars[k].item()]
else:
loss_epoch[k] += [log_vars[k]]
loss_epoch = {k: np.mean(v) for k, v in loss_epoch.items()}
if tb_logger is not None:
for k, v in loss_epoch.items():
Expand Down
6 changes: 4 additions & 2 deletions demo/simple_rl/config/ppo_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,10 @@
),
collect=dict(
# n_sample=3000,
collector=dict(
n_episode=8,
collector=dict(
collect_print_freq=1000,
get_train_sample=True,
deepcopy_obs=True,
transform_obs=True,
),
Expand All @@ -97,4 +99,4 @@
),
)

default_train_config = EasyDict(ppo_config)
default_train_config = EasyDict(ppo_config)
56 changes: 37 additions & 19 deletions demo/simple_rl/simple_rl_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
from core.eval import SerialEvaluator
from ding.envs import SyncSubprocessEnvManager, BaseEnvManager
from ding.policy import DQNPolicy, PPOPolicy, TD3Policy, SACPolicy, DDPGPolicy
from ding.worker import BaseLearner, SampleSerialCollector, AdvancedReplayBuffer, NaiveReplayBuffer
from ding.worker import BaseLearner, SampleSerialCollector, EpisodeSerialCollector, AdvancedReplayBuffer, \
NaiveReplayBuffer
from ding.utils import set_pkg_seed
from ding.rl_utils import get_epsilon_greedy_fn
from ding.framework import Task
Expand Down Expand Up @@ -56,15 +57,16 @@ def get_cfg(args):
'ddpg': NaiveReplayBuffer,
}[args.policy]
cfg = compile_config(
cfg = default_train_config,
env_manager = SyncSubprocessEnvManager,
policy = use_policy,
learner = BaseLearner,
collector = SampleSerialCollector,
buffer = use_buffer,
cfg=default_train_config,
env_manager=SyncSubprocessEnvManager,
policy=use_policy,
learner=BaseLearner,
collector=SampleSerialCollector,
buffer=use_buffer,
)
return cfg


def get_cls(spec):
policy_cls, model_cls = {
'dqn': (DQNPolicy, DQNRLModel),
Expand All @@ -76,11 +78,11 @@ def get_cls(spec):

return policy_cls, model_cls


def evaluate(task, evaluator, learner):
def _evaluate(ctx):
ctx.setdefault("envstep", -1) # Avoid attribute not existing
ctx.setdefault("train_iter", -1)
if evaluator.should_eval(ctx.train_iter):
if evaluator.should_eval(learner.train_iter):
stop, rate = evaluator.eval(learner.save_checkpoint, learner.train_iter, ctx.envstep)
if stop:
task.finish = True
Expand All @@ -98,15 +100,17 @@ def _collect(ctx):
new_data = collector.collect(train_iter=ctx.train_iter)
ctx.update_per_collect = len(new_data) // cfg.policy.learn.batch_size * 4
replay_buffer.push(new_data, cur_collector_envstep=collector.envstep)
ctx.envstep = collector.envstep
return _collect


def on_policy_collect(collector):
def _collect(ctx):
ctx.setdefault("train_iter", -1)
new_data = collector.collect(n_sample=3000, train_iter=ctx.train_iter)
new_data = collector.collect(train_iter=ctx.train_iter)
unpack_birdview(new_data)
ctx.new_data = new_data
ctx.envstep = collector.envstep
return _collect


Expand All @@ -128,6 +132,7 @@ def _train(ctx):
learner.train(train_data, ctx.envstep)
if cfg.policy.get('priority', False):
replay_buffer.update(learner.priority_info)
ctx.train_iter = learner.train_iter
return _train


Expand All @@ -150,9 +155,12 @@ def main(args, seed=0):
cfg=cfg.env.manager.collect,
)
evaluate_env = SyncSubprocessEnvManager(
env_fn=[partial(wrapped_env, cfg.env, cfg.env.wrapper.eval, *tcp_list[collector_env_num + i]) for i in range(evaluator_env_num)],
env_fn=[
partial(wrapped_env, cfg.env, cfg.env.wrapper.eval, *tcp_list[collector_env_num + i])
for i in range(evaluator_env_num)
],
cfg=cfg.env.manager.eval,
)
)
# Uncomment this to add save replay when evaluation
# evaluate_env.enable_save_replay(cfg.env.replay_path)

Expand All @@ -166,15 +174,25 @@ def main(args, seed=0):

tb_logger = SummaryWriter('./log/{}/'.format(cfg.exp_name))
learner = BaseLearner(cfg.policy.learn.learner, policy.learn_mode, tb_logger, exp_name=cfg.exp_name)
collector = SampleSerialCollector(cfg.policy.collect.collector, collector_env, policy.collect_mode, tb_logger, exp_name=cfg.exp_name)
evaluator = SerialEvaluator(cfg.policy.eval.evaluator, evaluate_env, policy.eval_mode, tb_logger, exp_name=cfg.exp_name)
collector = SampleSerialCollector(
cfg.policy.collect.collector, collector_env, policy.collect_mode, tb_logger, exp_name=cfg.exp_name
)
evaluator = SerialEvaluator(
cfg.policy.eval.evaluator, evaluate_env, policy.eval_mode, tb_logger, exp_name=cfg.exp_name
)

if args.policy != 'ppo':
collector = SampleSerialCollector(
cfg.policy.collect.collector, collector_env, policy.collect_mode, tb_logger, exp_name=cfg.exp_name
)
if cfg.policy.get('priority', False):
replay_buffer = AdvancedReplayBuffer(cfg.policy.other.replay_buffer, tb_logger, exp_name=cfg.exp_name)
else:
replay_buffer = NaiveReplayBuffer(cfg.policy.other.replay_buffer, tb_logger, exp_name=cfg.exp_name)
else:
collector = EpisodeSerialCollector(
cfg.policy.collect.collector, collector_env, policy.collect_mode, tb_logger, exp_name=cfg.exp_name
)
replay_buffer = None

if args.policy == 'dqn':
Expand All @@ -192,25 +210,25 @@ def main(args, seed=0):
else:
new_data = collector.collect(n_sample=cfg.policy.random_collect_size)
replay_buffer.push(new_data, cur_collector_envstep=collector.envstep)

with Task(async_mode=args.use_async) as task:
task.use_step_wrapper(StepTimer(print_per_step=1))
task.use_step_wrapper(StepTimer(print_per_step=1))
task.use(evaluate(task, evaluator, learner))
if replay_buffer is None:
task.use(on_policy_collect(collector))
else:
task.use(off_policy_collect(epsilon_greedy, collector, replay_buffer, cfg))
task.use(train(learner, replay_buffer, cfg))
task.run(max_step=int(1e8))

learner.call_hook('after_run')

collector.close()
evaluator.close()
learner.close()
if args.policy != 'ppo':
replay_buffer.close()

print('finish')


Expand All @@ -220,6 +238,6 @@ def main(args, seed=0):
parser.add_argument('-p', '--policy', default='dqn', choices=['dqn', 'ppo', 'td3', 'sac', 'ddpg'], help='RL policy')
parser.add_argument('-d', '--ding-cfg', default=None, help='DI-engine config path')
parser.add_argument('--use-async', action='store_true', help='whether use asynchronous execution mode')

args = parser.parse_args()
main(args)
Binary file added docs/figs/di-drive_banner.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
6 changes: 3 additions & 3 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
author = 'OpenDILab'

# The full version, including alpha/beta/rc tags
version = '0.3.1'
release = '0.3.1'
version = '0.3.2'
release = '0.3.2'


# -- General configuration ---------------------------------------------------
Expand Down Expand Up @@ -78,4 +78,4 @@
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_static_path = ['_static']
3 changes: 1 addition & 2 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@ DI-drive Documentation
.. toctree::
:maxdepth: 2

.. figure:: ../figs/di-drive_logo.png
.. figure:: ../figs/di-drive_banner.png
:alt: DI-drive
:width: 300px

Decision Intelligence Platform for Autonomous Driving simulation.

Expand Down
3 changes: 3 additions & 0 deletions docs/source/model_zoo/cict.rst
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,9 @@ velocity loss 0.44275
acceleration loss 3.52583
================== =========

We provide pre-train weights under these configs. You can download it using this
`link <http://opendilab.org/download/DI-drive/cict/il/>`_

Benchmark Evaluating
=======================

Expand Down
Loading

0 comments on commit ab21597

Please sign in to comment.