exo
exo copied to clipboard
RuntimeError: Wait timeout: 10000 ms (local run)
raceback (most recent call last):
File "/home/ffamax/exo/exo/api/chatgpt_api.py", line 273, in handle_post_chat_completions
await asyncio.wait_for(self.node.process_prompt(shard, prompt, image_str, request_id=request_id), timeout=self.response_timeout)
File "/home/ffamax/miniconda3/envs/.venv.py3.12/lib/python3.12/asyncio/tasks.py", line 520, in wait_for
return await fut
^^^^^^^^^
File "/home/ffamax/exo/exo/orchestration/standard_node.py", line 98, in process_prompt
resp = await self._process_prompt(base_shard, prompt, image_str, request_id, inference_state)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ffamax/exo/exo/orchestration/standard_node.py", line 134, in _process_prompt
result, inference_state, is_finished = await self.inference_engine.infer_prompt(request_id, shard, prompt, image_str, inference_state=inference_state)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ffamax/exo/exo/inference/tinygrad/inference.py", line 59, in infer_prompt
await self.ensure_shard(shard)
File "/home/ffamax/exo/exo/inference/tinygrad/inference.py", line 97, in ensure_shard
self.model = await asyncio.get_event_loop().run_in_executor(self.executor, build_transformer, model_path, shard, "8B" if "8b" in shard.model_id.lower() else "70B")
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ffamax/miniconda3/envs/.venv.py3.12/lib/python3.12/concurrent/futures/thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ffamax/exo/exo/inference/tinygrad/inference.py", line 48, in build_transformer
load_state_dict(model, weights, strict=False, consume=False) # consume=True
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ffamax/miniconda3/envs/.venv.py3.12/lib/python3.12/site-packages/tinygrad/nn/state.py", line 129, in load_state_dict
else: v.replace(state_dict[k].to(v.device)).realize()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ffamax/miniconda3/envs/.venv.py3.12/lib/python3.12/site-packages/tinygrad/tensor.py", line 3500, in _wrapper
ret = fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/home/ffamax/miniconda3/envs/.venv.py3.12/lib/python3.12/site-packages/tinygrad/tensor.py", line 213, in realize
run_schedule(*self.schedule_with_vars(*lst), do_update_stats=do_update_stats)
File "/home/ffamax/miniconda3/envs/.venv.py3.12/lib/python3.12/site-packages/tinygrad/engine/realize.py", line 224, in run_schedule
ei.run(var_vals, do_update_stats=do_update_stats)
File "/home/ffamax/miniconda3/envs/.venv.py3.12/lib/python3.12/site-packages/tinygrad/engine/realize.py", line 174, in run
et = self.prg(bufs, var_vals if var_vals is not None else {}, wait=wait or DEBUG >= 2)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ffamax/miniconda3/envs/.venv.py3.12/lib/python3.12/site-packages/tinygrad/engine/realize.py", line 140, in __call__
self.copy(dest, src)
File "/home/ffamax/miniconda3/envs/.venv.py3.12/lib/python3.12/site-packages/tinygrad/engine/realize.py", line 135, in copy
dest.copyin(src.as_buffer(allow_zero_copy=True)) # may allocate a CPU buffer depending on allow_zero_copy
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ffamax/miniconda3/envs/.venv.py3.12/lib/python3.12/site-packages/tinygrad/device.py", line 114, in as_buffer
return self.copyout(memoryview(bytearray(self.nbytes)))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ffamax/miniconda3/envs/.venv.py3.12/lib/python3.12/site-packages/tinygrad/device.py", line 125, in copyout
self.allocator.copyout(mv, self._buf)
File "/home/ffamax/miniconda3/envs/.venv.py3.12/lib/python3.12/site-packages/tinygrad/device.py", line 657, in copyout
self.device.synchronize()
File "/home/ffamax/miniconda3/envs/.venv.py3.12/lib/python3.12/site-packages/tinygrad/device.py", line 519, in synchronize
self.timeline_signal.wait(self.timeline_value - 1)
File "/home/ffamax/miniconda3/envs/.venv.py3.12/lib/python3.12/site-packages/tinygrad/device.py", line 424, in wait
raise RuntimeError(f"Wait timeout: {timeout} ms! (the signal is not set to {value}, but {self.value})")
RuntimeError: Wait timeout: 10000 ms! (the signal is not set to 19, but 0)
Deregister callback_id='chatgpt-api-wait-response-b71dd1bf-c1f7-4ea5-a626-1ddd6febcaf1' deregistered_callback=None