minerl
minerl copied to clipboard
Increasing Java heap memory
Question
How can I increase the Java heap memory size?
I'm trying to train an agent in MineRLBasaltFindCave-v0, but the training stops after several steps with the error I paste below (workers dead).
My machine is Ubuntu 24.04, RAM 62GB, and Java 1.8.0_462 installed. MineRL is minerl==1.0.2.
As only about 14GB of RAM is used during training, I suspect that the memory size allocated to JVM is low.
From htop I see the allocated memory size is 4GB:
java -Xmx4G -jar build/libs/.....
How can I specify this -Xmx setting? Or, is something else causing this error? I don't think the training code does anything special to increase memory usage, like instantiating multiple environments.
I also paste the environment wrapper I wrote for my agent at the end.
Error
[2025-09-23 17:01:39,125][minerl.env.malmo.instance.a822fe][WARNING] - [17:01:39] [Worker-Main-13/WARN]: Worker-Main-13 died
[2025-09-23 17:01:39,126][minerl.env.malmo.instance.a822fe][ERROR] - [17:01:39] [Worker-Main-13/ERROR]: Caught exception in thread Thread[Worker-Main-13,5,main]
[2025-09-23 17:01:39,509][minerl.env.malmo.instance.a822fe][WARNING] - [17:01:38] [Worker-Main-24/WARN]: Worker-Main-24 died
[2025-09-23 17:01:39,510][minerl.env.malmo.instance.a822fe][WARNING] - [17:01:36] [Worker-Main-20/WARN]: Worker-Main-20 died
[2025-09-23 17:01:39,510][minerl.env.malmo.instance.a822fe][ERROR] - [17:01:39] [Worker-Main-20/ERROR]: Caught exception in thread Thread[Worker-Main-20,5,main]
[2025-09-23 17:01:39,512][minerl.env.malmo.instance.a822fe][ERROR] - [17:01:39] [Worker-Main-24/ERROR]: Caught exception in thread Thread[Worker-Main-24,5,main]
Error executing job with overrides: ['task=find-cave', 'wandb_project=tdmpc2-minerl', 'obs=rgb']
Traceback (most recent call last):
File "/home/sensho/tdmpc2-minerl/tdmpc2/train.py", line 60, in train
trainer.train()
File "/home/sensho/tdmpc2-minerl/tdmpc2/trainer/online_trainer.py", line 103, in train
obs = self.env.reset()
File "/home/sensho/tdmpc2-minerl/tdmpc2/envs/wrappers/tensor.py", line 35, in reset
return self._obs_to_tensor(self.env.reset())
File "/home/sensho/tdmpc2-minerl/tdmpc2/envs/minerl.py", line 86, in reset
obs = super().reset()
File "/home/sensho/anaconda3/envs/tdmpc2/lib/python3.9/site-packages/gym/core.py", line 283, in reset
return self.env.reset(**kwargs)
File "/home/sensho/anaconda3/envs/tdmpc2/lib/python3.9/site-packages/gym/wrappers/time_limit.py", line 26, in reset
return self.env.reset(**kwargs)
File "/home/sensho/anaconda3/envs/tdmpc2/lib/python3.9/site-packages/gym/wrappers/order_enforcing.py", line 18, in reset
return self.env.reset(**kwargs)
File "/home/sensho/anaconda3/envs/tdmpc2/lib/python3.9/site-packages/minerl/herobraine/env_specs/basalt_specs.py", line 78, in reset
return self.env.reset()
File "/home/sensho/anaconda3/envs/tdmpc2/lib/python3.9/site-packages/minerl/herobraine/env_specs/basalt_specs.py", line 57, in reset
return super().reset()
File "/home/sensho/anaconda3/envs/tdmpc2/lib/python3.9/site-packages/gym/core.py", line 283, in reset
return self.env.reset(**kwargs)
File "/home/sensho/anaconda3/envs/tdmpc2/lib/python3.9/site-packages/minerl/env/_singleagent.py", line 22, in reset
multi_obs = super().reset()
File "/home/sensho/anaconda3/envs/tdmpc2/lib/python3.9/site-packages/minerl/env/_multiagent.py", line 446, in reset
self._send_mission(self.instances[0], agent_xmls[0], self._get_token(0, ep_uid)) # Master
File "/home/sensho/anaconda3/envs/tdmpc2/lib/python3.9/site-packages/minerl/env/_multiagent.py", line 605, in _send_mission
reply = comms.recv_message(instance.client_socket)
File "/home/sensho/anaconda3/envs/tdmpc2/lib/python3.9/site-packages/minerl/env/comms.py", line 63, in recv_message
lengthbuf = recvall(sock, 4)
File "/home/sensho/anaconda3/envs/tdmpc2/lib/python3.9/site-packages/minerl/env/comms.py", line 73, in recvall
newbuf = sock.recv(count)
socket.timeout: timed out
Set the environment variable HYDRA_FULL_ERROR=1 for a complete stack trace.
[2025-09-23 17:05:12,328][minerl.env.malmo][ERROR] - Attempted to send kill command to minecraft process and failed with exception timed out
[2025-09-23 17:05:12,337][process_watcher][INFO] - About to reap process tree of 336680:launchClient.sh:/usr/bin/bash i sleeping, owner 336451, printing process tree status in termination order:
[2025-09-23 17:05:12,337][process_watcher][INFO] - -336683:java:/usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java i sleeping, owner 336680
[2025-09-23 17:05:12,337][process_watcher][INFO] - -336680:launchClient.sh:/usr/bin/bash i sleeping, owner 336451
[2025-09-23 17:05:12,337][process_watcher][INFO] - Trying to SIGTERM 336683:java:/usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java i sleeping, owner 336680
[2025-09-23 17:05:17,370][process_watcher][INFO] - Process 336683 survived SIGTERM; trying SIGKILL on 336683:java:/usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java i sleeping, owner 336680
[2025-09-23 17:05:17,546][process_watcher][INFO] - Process psutil.Process(pid=336683, name='java', status='terminated', started='16:55:25') terminated with exit code None
[2025-09-23 17:05:17,547][process_watcher][INFO] - Trying to SIGTERM 336680:launchClient.sh:/usr/bin/bash i zombie, owner 336451
[2025-09-23 17:05:17,548][process_watcher][INFO] - Process psutil.Popen(pid=336680, name='launchClient.sh', status='terminated', exitcode=0) terminated with exit code 0
Wrapper
import os, sys
import gym, minerl
import numpy as np
import cv2
import torch
from termcolor import cprint
MINERL_TASKS = {
"find-cave": "MineRLBasaltFindCave-v0",
"create-animal-pen": "MineRLBasaltCreateVillageAnimalPen-v0",
"make-waterfall": "MineRLBasaltMakeWaterfall-v0",
"build-house": "MineRLBasaltBuildVillageHouse-v0",
}
def get_binary_action_keys(env):
binary_action_keys = []
for k, v in env.action_space.items():
if isinstance(v, gym.spaces.Discrete):
assert v.n == 2, f"Expected binary action space for {k}, got {v}"
binary_action_keys.append(k)
else:
assert k == "camera", f"Unexpected non-binary action space for {k}"
return binary_action_keys
class MineRLWrapper(gym.Wrapper):
def __init__(self, env, obs_size=64):
super().__init__(env)
self.obs_size = obs_size
self.max_episode_steps = env._max_episode_steps
self.camera_action_key = "camera"
self.binary_action_keys = get_binary_action_keys(env)
@property
def observation_space(self):
return gym.spaces.Box(
low=0,
high=255,
shape=(3, self.obs_size, self.obs_size),
dtype=np.uint8,
)
@property
def action_space(self):
return gym.spaces.Box(
low=0.0, high=1.0, shape=(25,), dtype=np.float32
)
def _preprocess_obs(self, obs):
"""
Crops sides, resizes, and transposes the observation.
"""
height, width, _ = obs.shape
if width > height:
crop_size = (width - height) // 2
obs = obs[:, crop_size : width - crop_size]
obs = cv2.resize(
obs, (self.obs_size, self.obs_size), interpolation=cv2.INTER_AREA
)
return torch.from_numpy(np.transpose(obs, (2, 0, 1)))
def _convert_action(self, action):
"""
Converts the continuous action vector from the agent to MineRL's dictionary format.
"""
action = 1 / (1 + np.exp(-action)) # sigmoid
camera_action, binary_actions = action[:2], action[2:]
camera_action = (camera_action - 0.5) * 360.0
binary_actions = (binary_actions > 0.5).astype(int)
assert len(binary_actions) == len(self.binary_action_keys)
action_dict = {
k: v for k, v in zip(self.binary_action_keys, binary_actions)
}
action_dict["camera"] = camera_action
return action_dict
def reset(self):
obs = super().reset()
return self._preprocess_obs(obs["pov"])
def step(self, action):
action = self._convert_action(action)
obs, reward, done, info = super().step(action)
return self._preprocess_obs(obs["pov"]), reward, done, info
def make_env(cfg):
"""
Create and return a MineRL environment based on the provided configuration.
"""
if not cfg.task in MINERL_TASKS:
raise ValueError("Unknown task:", cfg.task)
assert cfg.obs == "rgb", "MineRL only supports rgb observations."
env = gym.make(MINERL_TASKS[cfg.task])
env = MineRLWrapper(env)
return env