gym icon indicating copy to clipboard operation
gym copied to clipboard

[Bug Report] module 'numpy' has no attribute 'bool8'

Open nocoding03 opened this issue 1 year ago • 2 comments

Describe the bug in gym\utils\passive_env_checker.py:if not isinstance(terminated, (bool, np.bool8)):,but module 'numpy' has no attribute 'bool8'.

Code example import gym import torch from torch.distributions import Categorical from torch import nn, optim import torch.nn.functional as F def compute_policy_loss(n, log_p): r = list() for i in range(n, 0, -1): r.append(i * 1.0) r = torch.tensor(r) r = (r - r.mean()) / r.std() loss = 0 for pi, ri in zip(log_p, r): loss += -pi * ri return loss class CartPolePolicy(nn.Module): def init(self): super(CartPolePolicy, self).init() self.fc1 = nn.Linear(in_features=4, out_features=128) self.fc2 = nn.Linear(128, 2)
self.drop = nn.Dropout(p=0.6)

def forward(self, x):
    x = self.fc1(x)
    x = self.drop(x)
    x = F.relu(x)
    x = self.fc2(x)
    return F.softmax(x, dim=1)

if name == 'main': env = gym.make("CartPole-v0")
env.reset(seed=543) torch.manual_seed(543) policy = CartPolePolicy()
optimizer = optim.Adam(policy.parameters(), lr=0.01) max_episod = 10000
max_action = 10000
max_steps = 5000 for episod in range(1, max_episod + 1): state, _ = env.reset() step = 0 log_p = list() for step in range(1, max_action + 1): state = torch.from_numpy(state).float().unsqueeze(0) probs = policy(state)
m = Categorical(probs) action = m.sample() state, _, done, _, _ = env.step(action.item()) if done: break log_p.append(m.log_prob(action))
if step > max_steps: print(f"完成! 最后一回合 {episod} 运行步数 {step}") break optimizer.zero_grad() loss = compute_policy_loss(step, log_p) loss.backward() optimizer.step() if episod % 10 == 0: print(f"回合数 {episod} 累计运行步数 {step}") torch.save(policy.state_dict(), f"cartpole_policy.pth")

System Info Describe the characteristic of your environment: *pip install gym *Windows 11

  • Python 3.12.3
  • [x] I have checked that there is no similar issue in the repo (required)

nocoding03 avatar Oct 10 '24 11:10 nocoding03

You should take a look at that : https://github.com/openai/gym/pull/3258#issuecomment-1961272377 @nocoding03

Sin-when avatar Oct 26 '24 11:10 Sin-when

这是来自QQ邮箱的假期自动回复邮件。你好,我最近正在休假中,无法亲自回复你的邮件。我将在假期结束后,尽快给你回复。

w1463442883 avatar Oct 26 '24 11:10 w1463442883