ESA Herschel download_data connection error
Hi, I'm working on downloading all the observations for a small set of targets which includes Arp220. This is over 50 observations, something over 10G of data, and takes something over 20 min. However the archive throws a bunch of connection errors after about 20 minutes of downloading data. Minimum example below with the resulting full traceback. It would be nice if either the archive didn't drop the connection or if the errors thrown by astroquery were simpler to catch. As is, I can't figure out which Exception to catch.
minimum example:
from astropy.coordinates import SkyCoord
from astroquery.esa.hsa import HSA
from astroquery.exceptions import LoginError
import pandas as pd
from astropy import units as u
from astropy.table import Table
coords = []
labels = []
coords.append(SkyCoord(233.73856 , 23.50321, unit=u.deg ))
labels.append("Arp220")
coords.append(SkyCoord( 150.091 , 2.2745833, unit=u.deg ))
labels.append("COSMOS1")
sample_table = Table([coords, labels], names=['coord', 'label'])
#first find the object ids from herschel
#query_hsa_tap doesn't accept an upload_table, so do this as a for loop over each instrument and object.
search_radius_arcsec = 1.1
datadir = 'data/herschel'
for stab in sample_table:
search_coords = stab["coord"]
print("working on object", stab["label"])
for instrument_name in ['PACS', 'SPIRE']:
querystring = "select observation_id from hsa.v_active_observation join hsa.instrument using (instrument_oid) where contains(point('ICRS', hsa.v_active_observation.ra, hsa.v_active_observation.dec), circle('ICRS', "+str(search_coords.ra.deg)+", " + str(search_coords.dec.deg) +", " + str(search_radius_arcsec) +"))=1 and hsa.instrument.instrument_name='"+str(instrument_name)+"'"
objectid_table = HSA.query_hsa_tap(querystring)
#download_data only accepts one observation_id so we need to loop over each observation_id
for tab_id in range(len(objectid_table)):
observation_id = str(objectid_table[tab_id]['observation_id'])
try:
HSA.download_data(observation_id=observation_id, retrieval_type='OBSERVATION',
instrument_name=instrument_name, product_level = "LEVEL2, LEVEL_2_5, LEVEL_3", download_dir = datadir)
except LoginError:
print("This observation is proprietary, which might mean that it is calibration data")
full traceback:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
File /opt/conda/lib/python3.11/site-packages/urllib3/response.py:761, in HTTPResponse._update_chunk_length(self)
760 try:
--> 761 self.chunk_left = int(line, 16)
762 except ValueError:
763 # Invalid chunked protocol response, abort.
ValueError: invalid literal for int() with base 16: b''
During handling of the above exception, another exception occurred:
InvalidChunkLength Traceback (most recent call last)
File /opt/conda/lib/python3.11/site-packages/urllib3/response.py:444, in HTTPResponse._error_catcher(self)
443 try:
--> 444 yield
446 except SocketTimeout:
447 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
448 # there is yet no clean way to get at it from this context.
File /opt/conda/lib/python3.11/site-packages/urllib3/response.py:828, in HTTPResponse.read_chunked(self, amt, decode_content)
827 while True:
--> 828 self._update_chunk_length()
829 if self.chunk_left == 0:
File /opt/conda/lib/python3.11/site-packages/urllib3/response.py:765, in HTTPResponse._update_chunk_length(self)
764 self.close()
--> 765 raise InvalidChunkLength(self, line)
InvalidChunkLength: InvalidChunkLength(got length b'', 0 bytes read)
During handling of the above exception, another exception occurred:
ProtocolError Traceback (most recent call last)
File /opt/conda/lib/python3.11/site-packages/requests/models.py:816, in Response.iter_content.<locals>.generate()
815 try:
--> 816 yield from self.raw.stream(chunk_size, decode_content=True)
817 except ProtocolError as e:
File /opt/conda/lib/python3.11/site-packages/urllib3/response.py:624, in HTTPResponse.stream(self, amt, decode_content)
623 if self.chunked and self.supports_chunked_reads():
--> 624 for line in self.read_chunked(amt, decode_content=decode_content):
625 yield line
File /opt/conda/lib/python3.11/site-packages/urllib3/response.py:816, in HTTPResponse.read_chunked(self, amt, decode_content)
811 raise BodyNotHttplibCompatible(
812 "Body should be http.client.HTTPResponse like. "
813 "It should have have an fp attribute which returns raw chunks."
814 )
--> 816 with self._error_catcher():
817 # Don't bother reading the body of a HEAD request.
818 if self._original_response and is_response_to_head(self._original_response):
File /opt/conda/lib/python3.11/contextlib.py:155, in _GeneratorContextManager.__exit__(self, typ, value, traceback)
154 try:
--> 155 self.gen.throw(typ, value, traceback)
156 except StopIteration as exc:
157 # Suppress StopIteration *unless* it's the same exception that
158 # was passed to throw(). This prevents a StopIteration
159 # raised inside the "with" statement from being suppressed.
File /opt/conda/lib/python3.11/site-packages/urllib3/response.py:461, in HTTPResponse._error_catcher(self)
459 except (HTTPException, SocketError) as e:
460 # This includes IncompleteRead.
--> 461 raise ProtocolError("Connection broken: %r" % e, e)
463 # If no exception is thrown, we should avoid cleaning up
464 # unnecessarily.
ProtocolError: ("Connection broken: InvalidChunkLength(got length b'', 0 bytes read)", InvalidChunkLength(got length b'', 0 bytes read))
During handling of the above exception, another exception occurred:
ChunkedEncodingError Traceback (most recent call last)
Cell In[1], line 36
34 observation_id = str(objectid_table[tab_id]['observation_id'])
35 try:
---> 36 HSA.download_data(observation_id=observation_id, retrieval_type='OBSERVATION',
37 instrument_name=instrument_name, product_level = "LEVEL2, LEVEL_2_5, LEVEL_3", download_dir = datadir)
38 except LoginError:
39 print("This observation is proprietary, which might mean that it is calibration data")
File /opt/conda/lib/python3.11/site-packages/astroquery/esa/hsa/core.py:127, in HSAClass.download_data(self, retrieval_type, observation_id, instrument_name, filename, observation_oid, instrument_oid, product_level, verbose, download_dir, cache, **kwargs)
123 filename += "".join(suffixes)
125 filename = os.path.join(download_dir, filename)
--> 127 self._download_file(link, filename, head_safe=True, cache=cache)
129 if verbose:
130 log.info(f"Wrote {link} to {filename}")
File /opt/conda/lib/python3.11/site-packages/astroquery/query.py:498, in BaseQuery._download_file(self, url, local_filepath, timeout, auth, continuation, cache, method, head_safe, **kwargs)
495 with ProgressBarOrSpinner(length, f'Downloading URL {url} to {local_filepath} ...',
496 file=progress_stream) as pb:
497 with open(local_filepath, open_mode) as f:
--> 498 for block in response.iter_content(blocksize):
499 f.write(block)
500 bytes_read += len(block)
File /opt/conda/lib/python3.11/site-packages/requests/models.py:818, in Response.iter_content.<locals>.generate()
816 yield from self.raw.stream(chunk_size, decode_content=True)
817 except ProtocolError as e:
--> 818 raise ChunkedEncodingError(e)
819 except DecodeError as e:
820 raise ContentDecodingError(e)
ChunkedEncodingError: ("Connection broken: InvalidChunkLength(got length b'', 0 bytes read)", InvalidChunkLength(got length b'', 0 bytes read))
cc @jespinosaar
Hi @jkrick, many thanks for informing us about this. I have been trying the same code and I could download everything you have requested without any issue. Maybe there were some connectivity issues at our premises while you executed it. Can you please try again? Thanks!
@jkrick is this still an outstanding issue? If not, please close it. Thanks @jespinosaar for checking!
So sorry, this completely dropped off my radar. Thanks for the ping. It indeed does now work for me in entirety so I guess it was some temporary problem.