prefect
prefect copied to clipboard
Filesystem errors cause workflow to fail
First check
- [X] I added a descriptive title to this issue.
- [X] I used the GitHub search to find a similar issue and didn't find it.
- [X] I searched the Prefect documentation for this issue.
- [X] I checked that this issue is related to Prefect and not one of its dependencies.
Bug summary
If a prefect task failed to write its state to the attached remote filesystem the whole workflow will crash. I believe this should count against task retries or at least another write should occur if failed mark the task as failed without causing the whole workflow to fail / stuck
Reproduction
-
Error
File "/usr/local/lib/python3.10/dist-packages/prefect/engine.py", line 1076, in begin_task_run
return await orchestrate_task_run(
File "/usr/local/lib/python3.10/dist-packages/prefect/engine.py", line 1199, in orchestrate_task_run
terminal_state = await return_value_to_state(
File "/usr/local/lib/python3.10/dist-packages/prefect/states.py", line 250, in return_value_to_state
return Completed(data=await result_factory.create_result(retval))
File "/usr/local/lib/python3.10/dist-packages/prefect/results.py", line 302, in create_result
return await PersistedResult.create(
File "/usr/local/lib/python3.10/dist-packages/prefect/results.py", line 434, in create
await storage_block.write_path(key, content=blob.to_bytes())
File "/usr/local/lib/python3.10/dist-packages/prefect/filesystems.py", line 381, in write_path
self.filesystem.makedirs(dirpath, exist_ok=True)
File "/usr/local/lib/python3.10/dist-packages/fsspec/asyn.py", line 111, in wrapper
return sync(self.loop, func, *args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/fsspec/asyn.py", line 96, in sync
raise return_result
File "/usr/local/lib/python3.10/dist-packages/fsspec/asyn.py", line 53, in _runner
result[0] = await coro
File "/usr/local/lib/python3.10/dist-packages/s3fs/core.py", line 840, in _makedirs
await self._mkdir(path, create_parents=True)
File "/usr/local/lib/python3.10/dist-packages/s3fs/core.py", line 825, in _mkdir
await self._call_s3("create_bucket", **params)
File "/usr/local/lib/python3.10/dist-packages/s3fs/core.py", line 338, in _call_s3
return await _error_wrapper(
File "/usr/local/lib/python3.10/dist-packages/s3fs/core.py", line 138, in _error_wrapper
raise err
File "/usr/local/lib/python3.10/dist-packages/s3fs/core.py", line 111, in _error_wrapper
return await func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/aiobotocore/client.py", line 341, in _make_api_call
http, parsed_response = await self._make_request(
File "/usr/local/lib/python3.10/dist-packages/aiobotocore/client.py", line 366, in _make_request
return await self._endpoint.make_request(
File "/usr/local/lib/python3.10/dist-packages/aiobotocore/endpoint.py", line 100, in _send_request
while await self._needs_retry(
File "/usr/local/lib/python3.10/dist-packages/aiobotocore/endpoint.py", line 262, in _needs_retry
responses = await self._event_emitter.emit(
File "/usr/local/lib/python3.10/dist-packages/aiobotocore/hooks.py", line 66, in _emit
response = await resolve_awaitable(handler(**kwargs))
File "/usr/local/lib/python3.10/dist-packages/aiobotocore/_helpers.py", line 15, in resolve_awaitable
return await obj
File "/usr/local/lib/python3.10/dist-packages/aiobotocore/retryhandler.py", line 107, in _call
if await resolve_awaitable(self._checker(**checker_kwargs)):
File "/usr/local/lib/python3.10/dist-packages/aiobotocore/_helpers.py", line 15, in resolve_awaitable
return await obj
File "/usr/local/lib/python3.10/dist-packages/aiobotocore/retryhandler.py", line 126, in _call
should_retry = await self._should_retry(
File "/usr/local/lib/python3.10/dist-packages/aiobotocore/retryhandler.py", line 165, in _should_retry
return await resolve_awaitable(
File "/usr/local/lib/python3.10/dist-packages/aiobotocore/_helpers.py", line 15, in resolve_awaitable
return await obj
File "/usr/local/lib/python3.10/dist-packages/aiobotocore/retryhandler.py", line 174, in _call
checker(attempt_number, response, caught_exception)
File "/usr/local/lib/python3.10/dist-packages/botocore/retryhandler.py", line 247, in __call__
return self._check_caught_exception(
File "/usr/local/lib/python3.10/dist-packages/botocore/retryhandler.py", line 416, in _check_caught_exception
raise caught_exception
File "/usr/local/lib/python3.10/dist-packages/aiobotocore/endpoint.py", line 181, in _do_get_response
http_response = await self._send(request)
File "/usr/local/lib/python3.10/dist-packages/aiobotocore/endpoint.py", line 285, in _send
return await self.http_session.send(request)
File "/usr/local/lib/python3.10/dist-packages/aiobotocore/httpsession.py", line 246, in send
raise EndpointConnectionError(endpoint_url=request.url, error=e)
EndpointConnectionError('Could not connect to the endpoint URL: "http://s3-bucket-service"')
Versions
Version: 2.6.3
API version: 0.8.2
Python version: 3.10.7
Git commit: 9e7da96e
Built: Tue, Oct 18, 2022 1:55 PM
OS/Arch: linux/x86_64
Profile: default
Server type: cloud
Additional context
No response