gym
gym copied to clipboard
[Bug Report] Bug title unstable output for the state (aka observation) when using env.reset()
If you are submitting a bug report, please fill in the following details and use the tag [bug].
Describe the bug i run my code and every time i get a different out put so just i want an explanation why i have different out put
Code example import numpy as np import gym import random import matplotlib.pyplot as plt from random import choice from tqdm.notebook import tqdm from kaggle_environments import evaluate, make
class ConnectX(gym.Env): def init(self, switch_prob=0.5): self.env = make('connectx', debug=True) self.pair = [None, 'negamax'] self.trainer = self.env.train(self.pair) self.switch_prob = switch_prob
# Define required gym fields (examples):
config = self.env.configuration
self.action_space = gym.spaces.Discrete(config.columns)
self.observation_space = gym.spaces.Discrete(config.columns * config.rows)
def switch_trainer(self): self.pair = self.pair[::-1] self.trainer = self.env.train(self.pair)
def step(self, action): return self.trainer.step(action)
def reset(self): if random.uniform(0, 1) < self.switch_prob: self.switch_trainer() return self.trainer.reset()
def render(self, **kwargs): return self.env.render(**kwargs) class QTable: def init(self, action_space): self.table = dict() self.action_space = action_space
def add_item(self, state_key): self.table[state_key] = list(np.zeros(self.action_space.n))
def call(self, state): board = state['board'][:] # Get a copy board.append(state.mark) state_key = np.array(board).astype(str) state_key = hex(int(''.join(state_key), 3))[2:] if state_key not in self.table.keys(): self.add_item(state_key)
return self.table[state_key]
env = ConnectX()
alpha = 0.1 gamma = 0.6 epsilon = 0.99 min_epsilon = 0.1
episodes = 10000
alpha_decay_step = 1000 alpha_decay_rate = 0.9 epsilon_decay_rate = 0.9999
q_table = QTable(env.action_space)
all_epochs = [] all_total_rewards = [] all_avg_rewards = [] # Last 100 steps all_qtable_rows = [] all_epsilons = []
state = env.reset() print(state.board) #this is the tricky part
Output
/bin/python /home/gadour/Music/beginig.py [gadour@fedora ns-3.37]$ /bin/python /home/gadour/Music/beginig.py Loading environment lux_ai_s2 failed: No module named 'vec_noise' [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [gadour@fedora ns-3.37]$ /bin/python /home/gadour/Music/beginig.py /bin/python /home/gadour/Music/beginig.py Loading environment lux_ai_s2 failed: No module named 'vec_noise' Traceback (most recent call last): File "/home/gadour/Music/beginig.py", line 78, in print(state.board) ^^^^^^^^^^^ AttributeError: 'Struct' object has no attribute 'board' [gadour@fedora ns-3.37]$ /bin/python /home/gadour/Music/beginig.py Loading environment lux_ai_s2 failed: No module named 'vec_noise' [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [gadour@fedora ns-3.37]$ /bin/python /home/gadour/Music/beginig.py /bin/python /home/gadour/Music/beginig.py Loading environment lux_ai_s2 failed: No module named 'vec_noise' [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [gadour@fedora ns-3.37]$ /bin/python /home/gadour/Music/beginig.py /bin/python /home/gadour/Music/beginig.py Loading environment lux_ai_s2 failed: No module named 'vec_noise' [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [gadour@fedora ns-3.37]$ /bin/python /home/gadour/Music/beginig.py /bin/python /home/gadour/Music/beginig.py Loading environment lux_ai_s2 failed: No module named 'vec_noise' [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [gadour@fedora ns-3.37]$ /bin/python /home/gadour/Music/beginig.py /bin/python /home/gadour/Music/beginig.py Loading environment lux_ai_s2 failed: No module named 'vec_noise' [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [gadour@fedora ns-3.37]$ /bin/python /home/gadour/Music/beginig.py /bin/python /home/gadour/Music/beginig.py Loading environment lux_ai_s2 failed: No module named 'vec_noise' Traceback (most recent call last): File "/home/gadour/Music/beginig.py", line 78, in print(state.board) ^^^^^^^^^^^ AttributeError: 'Struct' object has no attribute 'board' [gadour@fedora ns-3.37]$ /bin/python /home/gadour/Music/beginig.py Loading environment lux_ai_s2 failed: No module named 'vec_noise' Traceback (most recent call last): File "/home/gadour/Music/beginig.py", line 78, in print(state.board) ^^^^^^^^^^^ AttributeError: 'Struct' object has no attribute 'board' [gadour@fedora ns-3.37]$ /bin/python /home/gadour/Music/beginig.py Loading environment lux_ai_s2 failed: No module named 'vec_noise' Traceback (most recent call last): File "/home/gadour/Music/beginig.py", line 78, in print(state.board)
System Info fedora linux version 37 pip install gym python version 3.11.1 and 3.10.9
My Question
So guys why the hell some times return to me the list and some times just give me an error saying 'Struct' object has no attribute 'board' why that happen please i want logical explanation and thnx
Note: I know if i use state['board'] it work better, but i want to know why the unstable output it's 04:48pm now i can't sleep thinking, so please i'm so excited to know why it goes like that.
I can't reproduce your issue. Please include all of the necessary python module and indented code
ok @pseudo-rnd-thoughts this is the full code and thank you
import numpy as np import gym import random import matplotlib.pyplot as plt from random import choice from tqdm.notebook import tqdm from kaggle_environments import evaluate, make
class ConnectX(gym.Env): def init(self, switch_prob=0.5): self.env = make('connectx', debug=True) self.pair = [None, 'negamax'] self.trainer = self.env.train(self.pair) self.switch_prob = switch_prob
# Define required gym fields (examples):
config = self.env.configuration
self.action_space = gym.spaces.Discrete(config.columns)
self.observation_space = gym.spaces.Discrete(config.columns * config.rows)
def switch_trainer(self):
self.pair = self.pair[::-1]
self.trainer = self.env.train(self.pair)
def step(self, action):
return self.trainer.step(action)
def reset(self):
if random.uniform(0, 1) < self.switch_prob:
self.switch_trainer()
return self.trainer.reset()
def render(self, **kwargs):
return self.env.render(**kwargs)
class QTable: def init(self, action_space): self.table = dict() self.action_space = action_space
def add_item(self, state_key):
self.table[state_key] = list(np.zeros(self.action_space.n))
def __call__(self, state):
board = state.board[:] # Get a copy
board.append(state.mark)
state_key = np.array(board).astype(str)
state_key = hex(int(''.join(state_key), 3))[2:]
if state_key not in self.table.keys():
self.add_item(state_key)
return self.table[state_key]
env = ConnectX()
q_table = QTable(env.action_space)
all_epochs = [] all_total_rewards = [] all_avg_rewards = [] # Last 100 steps all_qtable_rows = [] all_epsilons = []
for i in tqdm(range(episodes)): state = env.reset()
epsilon = max(min_epsilon, epsilon * epsilon_decay_rate)
epochs, total_rewards = 0, 0
done = False
while not done:
if random.uniform(0, 1) < epsilon:
action = choice([c for c in range(env.action_space.n) if state.board[c] == 0])
else:
row = q_table(state)[:]
selected_items = []
for j in range(env.action_space.n):
if state.board[j] == 0:
selected_items.append(row[j])
else:
selected_items.append(-1e7)
action = int(np.argmax(selected_items))
next_state, reward, done, info = env.step(action)
# Apply new rules
if done:
if reward == 1: # Won
reward = 20
elif reward == 0: # Lost
reward = -20
else: # Draw
reward = 10
else:
reward = -0.05 # Try to prevent the agent from taking a long move
old_value = q_table(state)[action]
next_max = np.max(q_table(next_state))
# Update Q-value
new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
q_table(state)[action] = new_value
state = next_state
epochs += 1
total_rewards += reward
all_epochs.append(epochs)
all_total_rewards.append(total_rewards)
avg_rewards = np.mean(all_total_rewards[max(0, i-100):(i+1)])
all_avg_rewards.append(avg_rewards)
all_qtable_rows.append(len(q_table.table))
all_epsilons.append(epsilon)
if (i+1) % alpha_decay_step == 0:
alpha *= alpha_decay_rate
###################################################################################### i use python 3.10.9 in conda only kaggle_environments was installed using pip
@pseudo-rnd-thoughts @Abdelkader-gnichi It may be because you're not using a fixed seed for the environment. This results in the environment restarting to a new state everytime.
@jjshoots I will check it and thank u