datajoint-python
datajoint-python copied to clipboard
S3 external files OS Error
trafficstars
I am trying to retrieve external blobs saved in a Linode (Akamai) bucket (S3). This works fine most of the time, but larger blobs seem to run into errors during fetch:
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
File ~/miniconda3/envs/octo_code/lib/python3.10/site-packages/urllib3/response.py:444, in HTTPResponse._error_catcher(self)
443 try:
--> 444 yield
446 except SocketTimeout:
447 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
448 # there is yet no clean way to get at it from this context.
File ~/miniconda3/envs/octo_code/lib/python3.10/site-packages/urllib3/response.py:567, in HTTPResponse.read(self, amt, decode_content, cache_content)
566 with self._error_catcher():
--> 567 data = self._fp_read(amt) if not fp_closed else b""
568 if amt is None:
File ~/miniconda3/envs/octo_code/lib/python3.10/site-packages/urllib3/response.py:533, in HTTPResponse._fp_read(self, amt)
531 else:
532 # StringIO doesn't like amt=None
--> 533 return self._fp.read(amt) if amt is not None else self._fp.read()
File ~/miniconda3/envs/octo_code/lib/python3.10/http/client.py:481, in HTTPResponse.read(self, amt)
480 try:
--> 481 s = self._safe_read(self.length)
482 except IncompleteRead:
File ~/miniconda3/envs/octo_code/lib/python3.10/http/client.py:630, in HTTPResponse._safe_read(self, amt)
624 """Read the number of bytes requested.
625
626 This function should be used when <amt> bytes "should" be present for
627 reading. If the bytes are truly not available (due to EOF), then the
628 IncompleteRead exception can be used to detect the problem.
629 """
--> 630 data = self.fp.read(amt)
631 if len(data) < amt:
File ~/miniconda3/envs/octo_code/lib/python3.10/socket.py:705, in SocketIO.readinto(self, b)
704 try:
--> 705 return self._sock.recv_into(b)
706 except timeout:
OSError: [Errno 22] Invalid argument
During handling of the above exception, another exception occurred:
ProtocolError Traceback (most recent call last)
Cell In[34], line 1
----> 1 data_to_export = (CuttleHeadMantle & key).fetch1(data2retrieve)
2 additional_data = (CuttleHeadMantle.Behavior & key).fetch1(additional_data_label)
4 len(data_to_export)
File ~/miniconda3/envs/octo_code/lib/python3.10/site-packages/datajoint/fetch.py:346, in Fetch1.__call__(self, squeeze, download_path, *attrs)
344 else: # fetch some attributes, return as tuple
345 attributes = [a for a in attrs if not is_key(a)]
--> 346 result = self._expression.proj(*attributes).fetch(
347 squeeze=squeeze, download_path=download_path, format="array"
348 )
349 if len(result) != 1:
350 raise DataJointError(
351 "fetch1 should only return one tuple. %d tuples found" % len(result)
352 )
File ~/miniconda3/envs/octo_code/lib/python3.10/site-packages/datajoint/fetch.py:289, in Fetch.__call__(self, offset, limit, order_by, format, as_dict, squeeze, download_path, *attrs)
286 raise e
287 for name in heading:
288 # unpack blobs and externals
--> 289 ret[name] = list(map(partial(get, heading[name]), ret[name]))
290 if format == "frame":
291 ret = pandas.DataFrame(ret).set_index(heading.primary_key)
File ~/miniconda3/envs/octo_code/lib/python3.10/site-packages/datajoint/fetch.py:111, in _get(connection, attr, data, squeeze, download_path)
103 safe_write(local_filepath, data.split(b"\0", 1)[1])
104 return adapt(str(local_filepath)) # download file from remote store
106 return adapt(
107 uuid.UUID(bytes=data)
108 if attr.uuid
109 else (
110 blob.unpack(
--> 111 extern.get(uuid.UUID(bytes=data)) if attr.is_external else data,
112 squeeze=squeeze,
113 )
114 if attr.is_blob
115 else data
116 )
117 )
File ~/miniconda3/envs/octo_code/lib/python3.10/site-packages/datajoint/external.py:203, in ExternalTable.get(self, uuid)
201 if blob is None:
202 try:
--> 203 blob = self._download_buffer(self._make_uuid_path(uuid))
204 except MissingExternalFile:
205 if not SUPPORT_MIGRATED_BLOBS:
File ~/miniconda3/envs/octo_code/lib/python3.10/site-packages/datajoint/external.py:142, in ExternalTable._download_buffer(self, external_path)
140 def _download_buffer(self, external_path):
141 if self.spec["protocol"] == "s3":
--> 142 return self.s3.get(external_path)
143 if self.spec["protocol"] == "file":
144 return Path(external_path).read_bytes()
File ~/miniconda3/envs/octo_code/lib/python3.10/site-packages/datajoint/s3.py:71, in Folder.get(self, name)
69 logger.debug("get: {}:{}".format(self.bucket, name))
70 try:
---> 71 return self.client.get_object(self.bucket, str(name)).data
72 except minio.error.S3Error as e:
73 if e.code == "NoSuchKey":
File ~/miniconda3/envs/octo_code/lib/python3.10/site-packages/urllib3/response.py:306, in HTTPResponse.data(self)
303 return self._body
305 if self._fp:
--> 306 return self.read(cache_content=True)
File ~/miniconda3/envs/octo_code/lib/python3.10/site-packages/urllib3/response.py:566, in HTTPResponse.read(self, amt, decode_content, cache_content)
563 flush_decoder = False
564 fp_closed = getattr(self._fp, "closed", False)
--> 566 with self._error_catcher():
567 data = self._fp_read(amt) if not fp_closed else b""
568 if amt is None:
File ~/miniconda3/envs/octo_code/lib/python3.10/contextlib.py:153, in _GeneratorContextManager.__exit__(self, typ, value, traceback)
151 value = typ()
152 try:
--> 153 self.gen.throw(typ, value, traceback)
154 except StopIteration as exc:
155 # Suppress StopIteration *unless* it's the same exception that
156 # was passed to throw(). This prevents a StopIteration
157 # raised inside the "with" statement from being suppressed.
158 return exc is not value
File ~/miniconda3/envs/octo_code/lib/python3.10/site-packages/urllib3/response.py:461, in HTTPResponse._error_catcher(self)
457 raise ReadTimeoutError(self._pool, None, "Read timed out.")
459 except (HTTPException, SocketError) as e:
460 # This includes IncompleteRead.
--> 461 raise ProtocolError("Connection broken: %r" % e, e)
463 # If no exception is thrown, we should avoid cleaning up
464 # unnecessarily.
465 clean_exit = True
ProtocolError: ("Connection broken: OSError(22, 'Invalid argument')", OSError(22, 'Invalid argument'))
Reproducibility
Include:
- OS: MacOS 13.3.1
- Python 3.10
- MySQL 8.0
- MySQL Deployment Strategy: Docker
- Datajoint python 0.13.8
Opened a new issue on minio python api https://github.com/minio/minio-py/issues/1280