gym-ple
gym-ple copied to clipboard
Can't use FlappyBird environment.
Question
I am installing pygame and gym_ple from the following two commands: !pip install git+https://github.com/GrupoTuring/PyGame-Learning-Environment !pip install git+https://github.com/lusob/gym-ple I am doing the following imports (they are part of a bigger project) import copy import torch import random import gym import gym_ple import matplotlib
import numpy as np import torch.nn.functional as F
import matplotlib.pyplot as plt import matplotlib.animation as animation
from collections import deque, namedtuple from IPython.display import HTML from base64 import b64encode
from torch import nn from torch.utils.data import DataLoader from torch.utils.data.dataset import IterableDataset from torch.optim import AdamW
from pytorch_lightning import LightningModule, Trainer
from gym.wrappers import TransformObservation
device = 'cuda:0' if torch.cuda.is_available() else 'cpu' num_gpus = torch.cuda.device_count()
Then I run the following lines of code: env = gym_ple.make("FlappyBird-v0") env.step(env.action_space.sample())
And I get the following error:
NotImplementedError Traceback (most recent call last) Cell In[25], line 1 ----> 1 env.step(env.action_space.sample())
File d:\anaconda3\Lib\site-packages\gym\wrappers\time_limit.py:17, in TimeLimit.step(self, action) 16 def step(self, action): ---> 17 observation, reward, done, info = self.env.step(action) 18 self._elapsed_steps += 1 19 if self._elapsed_steps >= self._max_episode_steps:
File d:\anaconda3\Lib\site-packages\gym\wrappers\order_enforcing.py:13, in OrderEnforcing.step(self, action) 11 def step(self, action): 12 assert self._has_reset, "Cannot call env.step() before calling reset()" ---> 13 observation, reward, done, info = self.env.step(action) 14 return observation, reward, done, info
File d:\anaconda3\Lib\site-packages\gym\core.py:80, in Env.step(self, action)
63 @abstractmethod
64 def step(self, action: ActType) -> Tuple[ObsType, float, bool, dict]:
65 """Run one timestep of the environment's dynamics. When end of
66 episode is reached, you are responsible for calling reset()
67 to reset this environment's state.
(...)
78 info (dict): contains auxiliary diagnostic information (helpful for debugging, logging, and sometimes learning)
79 """
---> 80 raise NotImplementedError
NotImplementedError:
I went into the site-packages directory inside my anaconda3 folder and I found the ple and gym_ple folder, and also, the file for FlappyBird game inside the ple folder. Please help me understand what is wrong
And I also intend to use the following wrappers for normalization: class RunningMeanStd: # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm def init(self, epsilon=1e-4, shape=()): self.mean = np.zeros(shape, "float64") self.var = np.ones(shape, "float64") self.count = epsilon
def update(self, x):
batch_mean = np.mean(x, axis=0)
batch_var = np.var(x, axis=0)
batch_count = x.shape[0]
self.update_from_moments(batch_mean, batch_var, batch_count)
def update_from_moments(self, batch_mean, batch_var, batch_count):
self.mean, self.var, self.count = update_mean_var_count_from_moments(
self.mean, self.var, self.count, batch_mean, batch_var, batch_count
)
def update_mean_var_count_from_moments( mean, var, count, batch_mean, batch_var, batch_count ): delta = batch_mean - mean tot_count = count + batch_count
new_mean = mean + delta * batch_count / tot_count
m_a = var * count
m_b = batch_var * batch_count
M2 = m_a + m_b + np.square(delta) * count * batch_count / tot_count
new_var = M2 / tot_count
new_count = tot_count
return new_mean, new_var, new_count
class NormalizeObservation(gym.core.Wrapper): def init( self, env, epsilon=1e-8, ): super().init(env) self.num_envs = getattr(env, "num_envs", 1) self.is_vector_env = getattr(env, "is_vector_env", False) if self.is_vector_env: self.obs_rms = RunningMeanStd(shape=self.single_observation_space.shape) else: self.obs_rms = RunningMeanStd(shape=self.observation_space.shape) self.epsilon = epsilon
def step(self, action):
obs, rews, dones, infos = self.env.step(action)
if self.is_vector_env:
obs = self.normalize(obs)
else:
obs = self.normalize(np.array([obs]))[0]
return obs, rews, dones, infos
def reset(self, **kwargs):
return_info = kwargs.get("return_info", False)
if return_info:
obs, info = self.env.reset(**kwargs)
else:
obs = self.env.reset(**kwargs)
if self.is_vector_env:
obs = self.normalize(obs)
else:
obs = self.normalize(np.array([obs]))[0]
if not return_info:
return obs
else:
return obs, info
def normalize(self, obs):
self.obs_rms.update(obs)
return (obs - self.obs_rms.mean) / np.sqrt(self.obs_rms.var + self.epsilon)
class NormalizeReward(gym.core.Wrapper): def init( self, env, gamma=0.99, epsilon=1e-8, ): super().init(env) self.num_envs = getattr(env, "num_envs", 1) self.is_vector_env = getattr(env, "is_vector_env", False) self.return_rms = RunningMeanStd(shape=()) self.returns = np.zeros(self.num_envs) self.gamma = gamma self.epsilon = epsilon
def step(self, action):
obs, rews, dones, infos = self.env.step(action)
if not self.is_vector_env:
rews = np.array([rews])
self.returns = self.returns * self.gamma + rews
rews = self.normalize(rews)
self.returns[dones] = 0.0
if not self.is_vector_env:
rews = rews[0]
return obs, rews, dones, infos
def normalize(self, rews):
self.return_rms.update(self.returns)
return rews / np.sqrt(self.return_rms.var + self.epsilon)
and I can't also render. Whenever I call env.render(), it throws the same error- not implemented. Please help!