Spaces:
Sleeping
Sleeping
import pytest | |
import numpy as np | |
from easydict import EasyDict | |
from ding.utils import set_pkg_seed | |
from dizoo.mujoco.envs import MujocoDiscEnv | |
def test_mujoco_env_eval_episode_return(): | |
set_pkg_seed(1234, use_cuda=False) | |
each_dim_disc_size = 2 | |
env = MujocoDiscEnv( | |
EasyDict( | |
{ | |
'env_id': 'Ant-v3', | |
'action_clip': False, | |
'each_dim_disc_size': each_dim_disc_size, | |
'delay_reward_step': 4, | |
'save_replay_gif': False, | |
'replay_path_gif': None | |
} | |
) | |
) | |
env.seed(1234) | |
env.reset() | |
action_dim = env._raw_action_space.shape | |
eval_episode_return = np.array([0.], dtype=np.float32) | |
while True: | |
action = np.random.randint(0, each_dim_disc_size ** action_dim[0], 1) | |
timestep = env.step(action) | |
eval_episode_return += timestep.reward | |
# print("{}(dtype: {})".format(timestep.reward, timestep.reward.dtype)) | |
if timestep.done: | |
print( | |
"{}({}), {}({})".format( | |
timestep.info['eval_episode_return'], type(timestep.info['eval_episode_return']), | |
eval_episode_return, type(eval_episode_return) | |
) | |
) | |
# timestep.reward and the cumulative reward in wrapper EvalEpisodeReturn are not the same. | |
assert abs(timestep.info['eval_episode_return'].item() - eval_episode_return.item()) / \ | |
abs(timestep.info['eval_episode_return'].item()) < 1e-5 | |
break | |