GameZero.jl New Bug

New Bug

Open alerem18 opened this issue 1 year ago • 1 comments

i tried the code twice and it was throwing an error in both also some times it stuck(when i'm not drawing objects, it stuck for long time), it's much slower than pygame as i expected it to be much more faster(or i've made a mistake in my code)

it may take few hours to get the error, usually between 3000-4000 episodes


using Flux
using Zygote
using Flux: gradient, params, ADAM, Momentum
using Flux.Losses: huber_loss
using Gym
using Distributions: Categorical, logpdf
using Statistics: mean, std


# ------------------------ MEMORY ---------------------------
mutable struct Memory
    states::Vector{Vector{Float32}}
    actions::Vector{Int}
    rewards::Vector{Float32}
    dones::Vector{Bool}
    log_probs::Vector{Float32}
end

function vector2matrix(input_vec::Vector{Vector{Float32}})
    nrows = length(input_vec)
    ncols = length(input_vec[1])
    matrix = zeros(Float32, ncols, nrows)
    for i ∈ 1:nrows
        matrix[:, i] = input_vec[i]
    end
    matrix
end

function matrix2vector(input_matrix)
    vector = Array{Vector{Float32}, 1}()
    for i ∈ 1:size(input_matrix, 2)
        push!(vector, input_matrix[:, i])
    end
    vector
end

function clear_memory(m::Memory)
    m.states = []
    m.actions = []
    m.rewards = []
    m.dones = []
    m.log_probs = []
end

# ------------------------- model --------------------------------
function build_networks(input_dim, output_dim, hidden_dim)
    actor = Chain(
        Dense(input_dim, hidden_dim, tanh),
        
        Dense(hidden_dim, hidden_dim, tanh),
        
        Dense(hidden_dim, output_dim),
        softmax
    )

    critic = Chain(
        Dense(input_dim, hidden_dim, tanh),
        
        Dense(hidden_dim, hidden_dim, tanh),
       
        Dense(hidden_dim, 1)
    )
    actor, critic
end

mutable struct Model
    actor::Chain
    critic::Chain
end

Flux.@functor Model

# --------------------------- agent -----------------------------

mutable struct PPO
    old_policy::Model
    policy::Model
    gamma::Float32
    update_every::Int
    update_step::Int
    opt
    memory::Memory
    eps_clip::Float32
    k_epochs::Int
    state_values::Vector{Float32}
    log_probs::Vector{Float32}
end

function PPO(state_dim::Int, action_dim::Int, hidden_dim::Int, lr, gamma, k_epochs, eps_clip, update_every, memory)
    println(state_dim, action_dim, hidden_dim)
    actor, critic = build_networks(state_dim, action_dim, hidden_dim)
    old_policy = Model(actor, critic)
    policy = Model(actor, critic)
    opt = ADAM(lr)
    update_step = 0
    state_values = [0.0]
    log_probs = [0.0]
    PPO(
        old_policy,
        policy,
        gamma,
        update_every,
        update_step,
        opt,
        memory,
        eps_clip,
        k_epochs,
        state_values,
        log_probs
    )
end

function select_action(agent::PPO, state::Vector{Float32}, train_mode::Bool)
    probs = agent.old_policy.actor(state)
    dist = Categorical(probs)
    action = rand(dist)
    if train_mode
        log_prob = logpdf(dist, action)
        push!(agent.memory.states, state)
        push!(agent.memory.actions, action)
        push!(agent.memory.log_probs, log_prob)
    end
    action
end

function take_step(agent::PPO, reward, done)
    push!(agent.memory.rewards, reward)
    push!(agent.memory.dones, done)
    agent.update_step += 1
    if agent.update_step % agent.update_every == 0
        train(agent)
        clear_memory(agent.memory)
    end
end

function update_target!(target, model; τ = 1f0)
    for (p_t, p_m) in zip(params(target), params(model))
        p_t .= (1f0 - τ) * p_t .+ τ * p_m
    end
end


function train(agent::PPO)
    states = vector2matrix(agent.memory.states)
    rewards = Array{Float32, 1}()
    discounted_reward = 0.0
    for (reward, done) ∈ zip(reverse(agent.memory.rewards), reverse(agent.memory.dones))
        if done
            discounted_reward = 0.0
        end
        discounted_reward = reward + agent.gamma * discounted_reward
        insert!(rewards, 1, discounted_reward)
    end
    rewards = (rewards .- mean(rewards)) ./ (std(rewards) .+ 1e-5)
    
    for _ ∈ 1:agent.k_epochs
        gs = gradient(params(agent.policy.critic)) do
            state_values = vec(agent.policy.critic(states))
            Zygote.ignore() do 
                agent.state_values = state_values
            end
            huber_loss(state_values, rewards)
        end
        Flux.Optimise.update!(agent.opt, params(agent.policy.critic), gs)
        
        gs = gradient(params(agent.policy.actor)) do
            

            advantages = rewards .- agent.state_values
                
            
            probs = agent.policy.actor(states)
            probs_lst = [probs[:, i] for i ∈ 1:size(probs, 2)]
            
            dist = [Categorical(x) for x ∈ probs_lst]
            
            entropies = map(x -> -sum(x .- log.(x)), probs_lst)
            
            log_probs = logpdf.(dist, agent.memory.actions)
                
            
            ratio = exp.(log_probs .- agent.memory.log_probs)
            
            surr1 = ratio .* advantages
            surr2 = clamp.(ratio, 1 - agent.eps_clip, 1 + agent.eps_clip) .* advantages
            loss  = mean(-min.(surr1, surr2))
            loss
        end
        Flux.Optimise.update!(agent.opt, params(agent.policy.actor), gs)
    end
    update_target!(agent.old_policy.critic, agent.policy.critic)
    update_target!(agent.old_policy.actor, agent.policy.actor)

end

# ----------------------------------- train -----------------------------------

function train_loop()
    env = GymEnv("CartPole-v1")
    state_dim = env.observation_space.shape[1]
    action_dim = env.action_space.n
    memory = Memory([], [], [], [], [])
    agent = PPO(state_dim, action_dim, 128, 0.001, 0.99, 4, 0.2, 2000, memory)
    total_reward = 0
    for episode ∈ 1:100000
        state = reset!(env)
        for _ ∈ 1:2000
            action = select_action(agent, state)
            state, reward, done, _ = step!(env, action - 1)
            total_reward += reward
            take_step(agent, reward, done)
            if done
                break
            end

        end

        if episode % 20 == 0
            avg_reward = total_reward / 20
            total_reward = 0
            @info "Episode : $episode | avg_reward : $avg_reward"
        end
    end
end














# -------------------------------------------- GAME ------------------------------------------------------------------------
using GameZero
using Colors


WIDTH = 400
HEIGHT = 400
WHITE = colorant"white"
BLUE = colorant"blue"
RED = colorant"red"
YELLOW = colorant"yellow"
BACKGROUND = WHITE

initial_x = 200
initial_y = 200
BLOCKSIZE = 20



mutable struct SnakeEnv
    SCORE::Int
    DIRECTION::String
    done::Bool
    state::Vector{Float32}
    reward::Float32
    allow_render::Bool
    snake::Vector{Rect}
    food::Rect
    step::Int
    use_first_state::Bool
    txt
    
end

directions_lst = ["right", "left", "up", "down"]

function place_food(snake)
    food_x = rand(1:WIDTH - BLOCKSIZE)
    food_y = rand(1:HEIGHT - BLOCKSIZE)

    food_obj = Rect(food_x, food_y, BLOCKSIZE, BLOCKSIZE)
    if any(map(x -> collide(x, food_obj), snake))
        place_food(snake)
    end
    food_obj
end


function get_state_1(env::SnakeEnv)
    danger_right_1, danger_left_1, danger_up_1, danger_down_1 = check_danger(env, 1)
    danger_right_2, danger_left_2, danger_up_2, danger_down_2 = check_danger(env, 2)
    danger_right_3, danger_left_3, danger_up_3, danger_down_3 = check_danger(env, 3)
    danger_right_4, danger_left_4, danger_up_4, danger_down_4 = check_danger(env, 4)
    state = [danger_right_1, danger_left_1, danger_up_1, danger_down_1,
             danger_right_2, danger_left_2, danger_up_2, danger_down_2,
             danger_right_3, danger_left_3, danger_up_3, danger_down_3,
             danger_right_4, danger_left_4, danger_up_4, danger_down_4]
    
    head = env.snake[1]
    tail = env.snake[end]
    food = env.food

    head.x > food.x ? push!(state, 1) : push!(state, 0)
    head.y > food.y ? push!(state, 1) : push!(state, 0)
    tail.x > food.x ? push!(state, 1) : push!(state, 0)
    tail.y > food.y ? push!(state, 1) : push!(state, 0)
    head.x > tail.x ? push!(state, 1) : push!(state, 0)
    head.y > tail.y ? push!(state, 1) : push!(state, 0)
    
    # add directions
    for dir ∈ directions_lst
        dir == env.DIRECTION ? push!(state, 1.0) : push!(state, 0.0)
    end
    env.state = state

end

function get_state_2(env::SnakeEnv)
    danger_right_1, danger_left_1, danger_up_1, danger_down_1 = check_danger(env, 1)
    head = env.snake[1]
    mid = env.snake[Int(round(length(env.snake) / 2))]
    tail = env.snake[end]
    food = env.food
    dir = findfirst(x -> x == env.DIRECTION, directions_lst)[1]
    state = [ mid.x / WIDTH, mid.y / HEIGHT, length(env.snake) / WIDTH,
             head.x / WIDTH, head.y / HEIGHT, tail.x / WIDTH, tail.y / HEIGHT, 
             (head.x - food.x) / WIDTH, (head.y - food.y) / HEIGHT, (tail.x - food.x) / WIDTH, (tail.y - food.y) / HEIGHT,
             food.x / WIDTH, food.y / HEIGHT,
             dir / 4]
            
    env.state = state
end

function reset(env::SnakeEnv)
    env.snake = [Rect(initial_x, initial_y, BLOCKSIZE, BLOCKSIZE),
         Rect(initial_x - BLOCKSIZE, initial_y, BLOCKSIZE, BLOCKSIZE),
         Rect(initial_x - 2 * BLOCKSIZE, initial_y, BLOCKSIZE, BLOCKSIZE)]
    
    env.food = place_food(env.snake)
    env.done = false
    env.reward = 0
    env.step = 1
    env.SCORE = 0
    env.DIRECTION = "right"
    env.txt = TextActor("Score : $(env.SCORE)", "moonhouse")
    if env.use_first_state
        get_state_1(env)
    else
        get_state_2(env)
    end
end


function draw_objects(env::SnakeEnv)
    head = env.snake[1]
    body = env.snake[2:end]
    draw(head, YELLOW, fill=true)
    map(x -> draw(x, BLUE), body)
    draw(env.food, RED, fill=true)
    draw(env.txt)

end

function game_over(env::SnakeEnv)
    head = env.snake[1]
    
    if head.x > WIDTH - BLOCKSIZE || head.x < 0 || head.y > HEIGHT - BLOCKSIZE || head.y < 0
       return true, -10
    end

    if env.step > 30 * length(env.snake)
        return true, -10
    end
    
    if any(map(x -> collide(x, head), env.snake[3:end]))
        return true, -100
    end

    

    return false, 0
end

function check_danger(env::SnakeEnv, step_size::Int)
    head = env.snake[1]
    danger_right = env.DIRECTION == "right" && (head.x + step_size * BLOCKSIZE > WIDTH - BLOCKSIZE) ? 1.0 : 0.0
    danger_left =  env.DIRECTION == "left" && (head.x - step_size * BLOCKSIZE) < 0 ? 1.0 : 0.0
    danger_up =  env.DIRECTION == "up" && (head.y - step_size * BLOCKSIZE) < 0 ? 1.0 : 0.0
    danger_up_down =  env.DIRECTION == "down" && (head.y + step_size * BLOCKSIZE) > HEIGHT - BLOCKSIZE ? 1.0 : 0.0

    danger_right, danger_left, danger_up, danger_up_down
 end



function move_snake(env::SnakeEnv)
        
    h_x = env.snake[1].x
    h_y = env.snake[1].y
    
    if env.DIRECTION == "right"
        insert!(env.snake, 1, Rect(h_x + BLOCKSIZE, h_y, BLOCKSIZE, BLOCKSIZE))
    elseif env.DIRECTION == "left"
        insert!(env.snake, 1, Rect(h_x - BLOCKSIZE, h_y, BLOCKSIZE, BLOCKSIZE))
    elseif env.DIRECTION == "up"
        insert!(env.snake, 1, Rect(h_x, h_y - BLOCKSIZE, BLOCKSIZE, BLOCKSIZE))
    else
        insert!(env.snake, 1,  Rect(h_x, h_y + BLOCKSIZE, BLOCKSIZE, BLOCKSIZE))
    end
    
end

function step(env::SnakeEnv, action::Int)
    env.step += 1
    if action == 1 && env.DIRECTION != "up"
        env.DIRECTION = "down"
    elseif action == 2 && env.DIRECTION != "down"
        env.DIRECTION = "up"
    elseif action == 3 && env.DIRECTION != "left"
        env.DIRECTION = "right"
    elseif action == 4 && env.DIRECTION != "right"
        env.DIRECTION = "left"
    end

    # update snake position
    move_snake(env)
    # check game over
    env.done, env.reward = game_over(env)
    if env.done
        if env.use_first_state
            get_state_1(env)
        else
            get_state_2(env)
        end
        return env.state, env.reward, env.done, env.SCORE
    end


    if collide(env.snake[1], env.food)
        env.SCORE += 1
        env.food = place_food(env.snake)
        env.reward = 10
    else
        pop!(env.snake)
    end
    env.txt = TextActor("Score : $(env.SCORE)", "moonhouse")
    
    if env.use_first_state
        get_state_1(env)
    else
        get_state_2(env)
    end
    return env.state, env.reward, env.done, env.SCORE
end



# -------------------- CREATE ENV ---------------------------------
snake_env = [Rect(initial_x, initial_y, BLOCKSIZE, BLOCKSIZE),
Rect(initial_x - BLOCKSIZE, initial_y, BLOCKSIZE, BLOCKSIZE),
Rect(initial_x - 2 * BLOCKSIZE, initial_y, BLOCKSIZE, BLOCKSIZE)]

food_env = place_food(snake_env)


env = SnakeEnv(0, "right", false, [0.0], 0.0, true, snake_env, food_env, 1, true, TextActor("Score : 0", "moonhouse"))
reset(env)
state_dim = length(env.state)
action_dim = 4

memory = Memory([], [], [], [], [])
agent = PPO(state_dim, action_dim, 128, 0.001, 0.99, 4, 0.2, 2000, memory)

function draw(g::Game)
    if env.allow_render
        sleep(0.03)
        draw_objects(env)
    end
    
end





# ---------------------------- loop ------------------------------

episode = 0
total_reward = 0
record = 0
function print_info(episode, total_reward, record)
        env.allow_render = true
        avg_reward = total_reward / 20
        println("Episode : $episode | avg_reward : $avg_reward | record : $record")
end

function onetime_loop()
    action = select_action(agent, env.state, false)
        
    next_state, reward, done, SCORE = step(env, action)
   
    
    if done
        env.allow_render = false
        reset(env)
    end
end

function full_loop()
    global episode, total_reward, record
    
    while !env.done
        
        action = select_action(agent, env.state, true)
        
        next_state, reward, done, SCORE = step(env, action)
        take_step(agent, reward, done)
        total_reward += reward
    end
    
    episode += 1
    if episode % 20 == 0
        
        print_info(episode, total_reward, record)
        total_reward = 0
    else
        env.allow_render =  false
    end

    if env.SCORE > record
        record = env.SCORE
    end
    reset(env)
    
end

function update(g::Game)
    env.allow_render ? onetime_loop() : full_loop()
end

Please submit a bug report with steps to reproduce this fault, and any error messages that follow (in their entirety). Thanks. Exception: EXCEPTION_ACCESS_VIOLATION at 0x6d642194 -- TTF_SizeUTF8_Internal at /workspace/srcdir/SDL2_ttf-2.0.15\SDL_ttf.c:1185 in expression starting at REPL[2]:1 TTF_SizeUTF8_Internal at /workspace/srcdir/SDL2_ttf-2.0.15\SDL_ttf.c:1185 TTF_RenderUTF8_Blended at /workspace/srcdir/SDL2_ttf-2.0.15\SDL_ttf.c:1630 TTF_RenderText_Blended at /workspace/srcdir/SDL2_ttf-2.0.15\SDL_ttf.c:1600 TTF_RenderText_Blended at C:\Users\Administrator.julia\packages\SimpleDirectMediaLayer\wjMsP\src\LibSDL2.jl:6038 [inlined] #TextActor#12 at C:\Users\Administrator.julia\packages\GameZero\q74y7\src\actor.jl:35 TextActor at C:\Users\Administrator.julia\packages\GameZero\q74y7\src\actor.jl:33 [inlined] step at C:\Users\Administrator\Desktop\julia\snake.jl:445 full_loop at C:\Users\Administrator\Desktop\julia\snake.jl:515 update at C:\Users\Administrator\Desktop\julia\snake.jl:537 unknown function (ip: 0000022c242fc776)

Please submit a bug report with steps to reproduce this fault, and any error messages that follow (in their entirety). Thanks. Exception: UNKNOWN at 0x7ffdd3cc49b9 --

Apr 07 '23 09:04 alerem18

I tried replicating this, but installing Gym fails on my machine 😢 How did you install Gym? You're on Windows, I presume.

Apr 26 '23 16:04 aviks

GameZero.jl GameZero.jl copied to clipboard

New Bug

GameZero.jl
GameZero.jl copied to clipboard