GameZero.jl
GameZero.jl copied to clipboard
New Bug
i tried the code twice and it was throwing an error in both also some times it stuck(when i'm not drawing objects, it stuck for long time), it's much slower than pygame as i expected it to be much more faster(or i've made a mistake in my code)
it may take few hours to get the error, usually between 3000-4000 episodes
using Flux
using Zygote
using Flux: gradient, params, ADAM, Momentum
using Flux.Losses: huber_loss
using Gym
using Distributions: Categorical, logpdf
using Statistics: mean, std
# ------------------------ MEMORY ---------------------------
mutable struct Memory
states::Vector{Vector{Float32}}
actions::Vector{Int}
rewards::Vector{Float32}
dones::Vector{Bool}
log_probs::Vector{Float32}
end
function vector2matrix(input_vec::Vector{Vector{Float32}})
nrows = length(input_vec)
ncols = length(input_vec[1])
matrix = zeros(Float32, ncols, nrows)
for i ∈ 1:nrows
matrix[:, i] = input_vec[i]
end
matrix
end
function matrix2vector(input_matrix)
vector = Array{Vector{Float32}, 1}()
for i ∈ 1:size(input_matrix, 2)
push!(vector, input_matrix[:, i])
end
vector
end
function clear_memory(m::Memory)
m.states = []
m.actions = []
m.rewards = []
m.dones = []
m.log_probs = []
end
# ------------------------- model --------------------------------
function build_networks(input_dim, output_dim, hidden_dim)
actor = Chain(
Dense(input_dim, hidden_dim, tanh),
Dense(hidden_dim, hidden_dim, tanh),
Dense(hidden_dim, output_dim),
softmax
)
critic = Chain(
Dense(input_dim, hidden_dim, tanh),
Dense(hidden_dim, hidden_dim, tanh),
Dense(hidden_dim, 1)
)
actor, critic
end
mutable struct Model
actor::Chain
critic::Chain
end
Flux.@functor Model
# --------------------------- agent -----------------------------
mutable struct PPO
old_policy::Model
policy::Model
gamma::Float32
update_every::Int
update_step::Int
opt
memory::Memory
eps_clip::Float32
k_epochs::Int
state_values::Vector{Float32}
log_probs::Vector{Float32}
end
function PPO(state_dim::Int, action_dim::Int, hidden_dim::Int, lr, gamma, k_epochs, eps_clip, update_every, memory)
println(state_dim, action_dim, hidden_dim)
actor, critic = build_networks(state_dim, action_dim, hidden_dim)
old_policy = Model(actor, critic)
policy = Model(actor, critic)
opt = ADAM(lr)
update_step = 0
state_values = [0.0]
log_probs = [0.0]
PPO(
old_policy,
policy,
gamma,
update_every,
update_step,
opt,
memory,
eps_clip,
k_epochs,
state_values,
log_probs
)
end
function select_action(agent::PPO, state::Vector{Float32}, train_mode::Bool)
probs = agent.old_policy.actor(state)
dist = Categorical(probs)
action = rand(dist)
if train_mode
log_prob = logpdf(dist, action)
push!(agent.memory.states, state)
push!(agent.memory.actions, action)
push!(agent.memory.log_probs, log_prob)
end
action
end
function take_step(agent::PPO, reward, done)
push!(agent.memory.rewards, reward)
push!(agent.memory.dones, done)
agent.update_step += 1
if agent.update_step % agent.update_every == 0
train(agent)
clear_memory(agent.memory)
end
end
function update_target!(target, model; τ = 1f0)
for (p_t, p_m) in zip(params(target), params(model))
p_t .= (1f0 - τ) * p_t .+ τ * p_m
end
end
function train(agent::PPO)
states = vector2matrix(agent.memory.states)
rewards = Array{Float32, 1}()
discounted_reward = 0.0
for (reward, done) ∈ zip(reverse(agent.memory.rewards), reverse(agent.memory.dones))
if done
discounted_reward = 0.0
end
discounted_reward = reward + agent.gamma * discounted_reward
insert!(rewards, 1, discounted_reward)
end
rewards = (rewards .- mean(rewards)) ./ (std(rewards) .+ 1e-5)
for _ ∈ 1:agent.k_epochs
gs = gradient(params(agent.policy.critic)) do
state_values = vec(agent.policy.critic(states))
Zygote.ignore() do
agent.state_values = state_values
end
huber_loss(state_values, rewards)
end
Flux.Optimise.update!(agent.opt, params(agent.policy.critic), gs)
gs = gradient(params(agent.policy.actor)) do
advantages = rewards .- agent.state_values
probs = agent.policy.actor(states)
probs_lst = [probs[:, i] for i ∈ 1:size(probs, 2)]
dist = [Categorical(x) for x ∈ probs_lst]
entropies = map(x -> -sum(x .- log.(x)), probs_lst)
log_probs = logpdf.(dist, agent.memory.actions)
ratio = exp.(log_probs .- agent.memory.log_probs)
surr1 = ratio .* advantages
surr2 = clamp.(ratio, 1 - agent.eps_clip, 1 + agent.eps_clip) .* advantages
loss = mean(-min.(surr1, surr2))
loss
end
Flux.Optimise.update!(agent.opt, params(agent.policy.actor), gs)
end
update_target!(agent.old_policy.critic, agent.policy.critic)
update_target!(agent.old_policy.actor, agent.policy.actor)
end
# ----------------------------------- train -----------------------------------
function train_loop()
env = GymEnv("CartPole-v1")
state_dim = env.observation_space.shape[1]
action_dim = env.action_space.n
memory = Memory([], [], [], [], [])
agent = PPO(state_dim, action_dim, 128, 0.001, 0.99, 4, 0.2, 2000, memory)
total_reward = 0
for episode ∈ 1:100000
state = reset!(env)
for _ ∈ 1:2000
action = select_action(agent, state)
state, reward, done, _ = step!(env, action - 1)
total_reward += reward
take_step(agent, reward, done)
if done
break
end
end
if episode % 20 == 0
avg_reward = total_reward / 20
total_reward = 0
@info "Episode : $episode | avg_reward : $avg_reward"
end
end
end
# -------------------------------------------- GAME ------------------------------------------------------------------------
using GameZero
using Colors
WIDTH = 400
HEIGHT = 400
WHITE = colorant"white"
BLUE = colorant"blue"
RED = colorant"red"
YELLOW = colorant"yellow"
BACKGROUND = WHITE
initial_x = 200
initial_y = 200
BLOCKSIZE = 20
mutable struct SnakeEnv
SCORE::Int
DIRECTION::String
done::Bool
state::Vector{Float32}
reward::Float32
allow_render::Bool
snake::Vector{Rect}
food::Rect
step::Int
use_first_state::Bool
txt
end
directions_lst = ["right", "left", "up", "down"]
function place_food(snake)
food_x = rand(1:WIDTH - BLOCKSIZE)
food_y = rand(1:HEIGHT - BLOCKSIZE)
food_obj = Rect(food_x, food_y, BLOCKSIZE, BLOCKSIZE)
if any(map(x -> collide(x, food_obj), snake))
place_food(snake)
end
food_obj
end
function get_state_1(env::SnakeEnv)
danger_right_1, danger_left_1, danger_up_1, danger_down_1 = check_danger(env, 1)
danger_right_2, danger_left_2, danger_up_2, danger_down_2 = check_danger(env, 2)
danger_right_3, danger_left_3, danger_up_3, danger_down_3 = check_danger(env, 3)
danger_right_4, danger_left_4, danger_up_4, danger_down_4 = check_danger(env, 4)
state = [danger_right_1, danger_left_1, danger_up_1, danger_down_1,
danger_right_2, danger_left_2, danger_up_2, danger_down_2,
danger_right_3, danger_left_3, danger_up_3, danger_down_3,
danger_right_4, danger_left_4, danger_up_4, danger_down_4]
head = env.snake[1]
tail = env.snake[end]
food = env.food
head.x > food.x ? push!(state, 1) : push!(state, 0)
head.y > food.y ? push!(state, 1) : push!(state, 0)
tail.x > food.x ? push!(state, 1) : push!(state, 0)
tail.y > food.y ? push!(state, 1) : push!(state, 0)
head.x > tail.x ? push!(state, 1) : push!(state, 0)
head.y > tail.y ? push!(state, 1) : push!(state, 0)
# add directions
for dir ∈ directions_lst
dir == env.DIRECTION ? push!(state, 1.0) : push!(state, 0.0)
end
env.state = state
end
function get_state_2(env::SnakeEnv)
danger_right_1, danger_left_1, danger_up_1, danger_down_1 = check_danger(env, 1)
head = env.snake[1]
mid = env.snake[Int(round(length(env.snake) / 2))]
tail = env.snake[end]
food = env.food
dir = findfirst(x -> x == env.DIRECTION, directions_lst)[1]
state = [ mid.x / WIDTH, mid.y / HEIGHT, length(env.snake) / WIDTH,
head.x / WIDTH, head.y / HEIGHT, tail.x / WIDTH, tail.y / HEIGHT,
(head.x - food.x) / WIDTH, (head.y - food.y) / HEIGHT, (tail.x - food.x) / WIDTH, (tail.y - food.y) / HEIGHT,
food.x / WIDTH, food.y / HEIGHT,
dir / 4]
env.state = state
end
function reset(env::SnakeEnv)
env.snake = [Rect(initial_x, initial_y, BLOCKSIZE, BLOCKSIZE),
Rect(initial_x - BLOCKSIZE, initial_y, BLOCKSIZE, BLOCKSIZE),
Rect(initial_x - 2 * BLOCKSIZE, initial_y, BLOCKSIZE, BLOCKSIZE)]
env.food = place_food(env.snake)
env.done = false
env.reward = 0
env.step = 1
env.SCORE = 0
env.DIRECTION = "right"
env.txt = TextActor("Score : $(env.SCORE)", "moonhouse")
if env.use_first_state
get_state_1(env)
else
get_state_2(env)
end
end
function draw_objects(env::SnakeEnv)
head = env.snake[1]
body = env.snake[2:end]
draw(head, YELLOW, fill=true)
map(x -> draw(x, BLUE), body)
draw(env.food, RED, fill=true)
draw(env.txt)
end
function game_over(env::SnakeEnv)
head = env.snake[1]
if head.x > WIDTH - BLOCKSIZE || head.x < 0 || head.y > HEIGHT - BLOCKSIZE || head.y < 0
return true, -10
end
if env.step > 30 * length(env.snake)
return true, -10
end
if any(map(x -> collide(x, head), env.snake[3:end]))
return true, -100
end
return false, 0
end
function check_danger(env::SnakeEnv, step_size::Int)
head = env.snake[1]
danger_right = env.DIRECTION == "right" && (head.x + step_size * BLOCKSIZE > WIDTH - BLOCKSIZE) ? 1.0 : 0.0
danger_left = env.DIRECTION == "left" && (head.x - step_size * BLOCKSIZE) < 0 ? 1.0 : 0.0
danger_up = env.DIRECTION == "up" && (head.y - step_size * BLOCKSIZE) < 0 ? 1.0 : 0.0
danger_up_down = env.DIRECTION == "down" && (head.y + step_size * BLOCKSIZE) > HEIGHT - BLOCKSIZE ? 1.0 : 0.0
danger_right, danger_left, danger_up, danger_up_down
end
function move_snake(env::SnakeEnv)
h_x = env.snake[1].x
h_y = env.snake[1].y
if env.DIRECTION == "right"
insert!(env.snake, 1, Rect(h_x + BLOCKSIZE, h_y, BLOCKSIZE, BLOCKSIZE))
elseif env.DIRECTION == "left"
insert!(env.snake, 1, Rect(h_x - BLOCKSIZE, h_y, BLOCKSIZE, BLOCKSIZE))
elseif env.DIRECTION == "up"
insert!(env.snake, 1, Rect(h_x, h_y - BLOCKSIZE, BLOCKSIZE, BLOCKSIZE))
else
insert!(env.snake, 1, Rect(h_x, h_y + BLOCKSIZE, BLOCKSIZE, BLOCKSIZE))
end
end
function step(env::SnakeEnv, action::Int)
env.step += 1
if action == 1 && env.DIRECTION != "up"
env.DIRECTION = "down"
elseif action == 2 && env.DIRECTION != "down"
env.DIRECTION = "up"
elseif action == 3 && env.DIRECTION != "left"
env.DIRECTION = "right"
elseif action == 4 && env.DIRECTION != "right"
env.DIRECTION = "left"
end
# update snake position
move_snake(env)
# check game over
env.done, env.reward = game_over(env)
if env.done
if env.use_first_state
get_state_1(env)
else
get_state_2(env)
end
return env.state, env.reward, env.done, env.SCORE
end
if collide(env.snake[1], env.food)
env.SCORE += 1
env.food = place_food(env.snake)
env.reward = 10
else
pop!(env.snake)
end
env.txt = TextActor("Score : $(env.SCORE)", "moonhouse")
if env.use_first_state
get_state_1(env)
else
get_state_2(env)
end
return env.state, env.reward, env.done, env.SCORE
end
# -------------------- CREATE ENV ---------------------------------
snake_env = [Rect(initial_x, initial_y, BLOCKSIZE, BLOCKSIZE),
Rect(initial_x - BLOCKSIZE, initial_y, BLOCKSIZE, BLOCKSIZE),
Rect(initial_x - 2 * BLOCKSIZE, initial_y, BLOCKSIZE, BLOCKSIZE)]
food_env = place_food(snake_env)
env = SnakeEnv(0, "right", false, [0.0], 0.0, true, snake_env, food_env, 1, true, TextActor("Score : 0", "moonhouse"))
reset(env)
state_dim = length(env.state)
action_dim = 4
memory = Memory([], [], [], [], [])
agent = PPO(state_dim, action_dim, 128, 0.001, 0.99, 4, 0.2, 2000, memory)
function draw(g::Game)
if env.allow_render
sleep(0.03)
draw_objects(env)
end
end
# ---------------------------- loop ------------------------------
episode = 0
total_reward = 0
record = 0
function print_info(episode, total_reward, record)
env.allow_render = true
avg_reward = total_reward / 20
println("Episode : $episode | avg_reward : $avg_reward | record : $record")
end
function onetime_loop()
action = select_action(agent, env.state, false)
next_state, reward, done, SCORE = step(env, action)
if done
env.allow_render = false
reset(env)
end
end
function full_loop()
global episode, total_reward, record
while !env.done
action = select_action(agent, env.state, true)
next_state, reward, done, SCORE = step(env, action)
take_step(agent, reward, done)
total_reward += reward
end
episode += 1
if episode % 20 == 0
print_info(episode, total_reward, record)
total_reward = 0
else
env.allow_render = false
end
if env.SCORE > record
record = env.SCORE
end
reset(env)
end
function update(g::Game)
env.allow_render ? onetime_loop() : full_loop()
end
Please submit a bug report with steps to reproduce this fault, and any error messages that follow (in their entirety). Thanks. Exception: EXCEPTION_ACCESS_VIOLATION at 0x6d642194 -- TTF_SizeUTF8_Internal at /workspace/srcdir/SDL2_ttf-2.0.15\SDL_ttf.c:1185 in expression starting at REPL[2]:1 TTF_SizeUTF8_Internal at /workspace/srcdir/SDL2_ttf-2.0.15\SDL_ttf.c:1185 TTF_RenderUTF8_Blended at /workspace/srcdir/SDL2_ttf-2.0.15\SDL_ttf.c:1630 TTF_RenderText_Blended at /workspace/srcdir/SDL2_ttf-2.0.15\SDL_ttf.c:1600 TTF_RenderText_Blended at C:\Users\Administrator.julia\packages\SimpleDirectMediaLayer\wjMsP\src\LibSDL2.jl:6038 [inlined] #TextActor#12 at C:\Users\Administrator.julia\packages\GameZero\q74y7\src\actor.jl:35 TextActor at C:\Users\Administrator.julia\packages\GameZero\q74y7\src\actor.jl:33 [inlined] step at C:\Users\Administrator\Desktop\julia\snake.jl:445 full_loop at C:\Users\Administrator\Desktop\julia\snake.jl:515 update at C:\Users\Administrator\Desktop\julia\snake.jl:537 unknown function (ip: 0000022c242fc776)
Please submit a bug report with steps to reproduce this fault, and any error messages that follow (in their entirety). Thanks. Exception: UNKNOWN at 0x7ffdd3cc49b9 --
I tried replicating this, but installing Gym
fails on my machine 😢 How did you install Gym? You're on Windows, I presume.