uproot5
uproot5 copied to clipboard
`ClientOSError` during repeat file read with `uproot.dask`
During debugging I ran into some surprising crashes. I was processing the same file multiple times in a setup that can be broken down to this:
import awkward as ak
import uproot
prefix = "https://xrootd-local.unl.edu:1094//store/user/AGC/nanoAOD/TT_TuneCUETP8M1_13TeV-powheg-pythia8/"
fname = prefix + "cmsopendata2015_ttbar_19980_PU25nsData2015v1_76X_mcRun2_asymptotic_v12_ext3-v1_00000_0000.root"
for fname_ in [fname, fname]:
print(f"reading {fname_}")
evts = uproot.dask({fname_: "Events"}, steps_per_file=1)
ak.sum(evts.event).compute()
The first loop iteration is fine, the second one results in a crash:
TypeError: 'ClientOSError' object is not subscriptable
The error is not reproducible with local files for me.
I am using uproot
version 5.3.10
.
Full trace:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[4], line 10
8 print(f"reading {fname}")
9 evts = uproot.dask({fname_: "Events"}, steps_per_file=1)
---> 10 ak.sum(evts.event).compute()
File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/dask/base.py:375, in DaskMethodsMixin.compute(self, **kwargs)
351 def compute(self, **kwargs):
352 """Compute this dask collection
353
354 This turns a lazy Dask collection into its in-memory equivalent.
(...)
373 dask.compute
374 """
--> 375 (result,) = compute(self, traverse=False, **kwargs)
376 return result
File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/dask/base.py:661, in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
658 postcomputes.append(x.__dask_postcompute__())
660 with shorten_traceback():
--> 661 results = schedule(dsk, keys, **kwargs)
663 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/_dask.py:1227, in _UprootRead.__call__(self, i_start_stop)
1215 except self.allowed_exceptions as err:
1216 return (
1217 self.mock_empty(backend="cpu"),
1218 _report_failure(
(...)
1224 ),
1225 )
-> 1227 result, _ = self._call_impl(i, start, stop)
1228 return result
File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/_dask.py:1231, in _UprootRead._call_impl(self, i, start, stop)
1230 def _call_impl(self, i, start, stop):
-> 1231 return self.read_tree(
1232 self.ttrees[i],
1233 start,
1234 stop,
1235 )
File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/_dask.py:985, in UprootReadMixin.read_tree(self, tree, start, stop)
980 nplike = Numpy.instance()
982 # The remap implementation should correctly populate the generated
983 # buffer mapping in __call__, such that the high-level form can be
984 # used in `from_buffers`
--> 985 mapping = self.form_mapping_info.load_buffers(
986 tree,
987 self.common_keys,
988 start,
989 stop,
990 self.decompression_executor,
991 self.interpretation_executor,
992 self.interp_options,
993 )
995 # Populate container with placeholders if keys aren't required
996 # Otherwise, read from disk
997 container = {}
File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/_dask.py:908, in TrivialFormMappingInfo.load_buffers(self, tree, keys, start, stop, decompression_executor, interpretation_executor, options)
897 def load_buffers(
898 self,
899 tree: HasBranches,
(...)
906 ) -> Mapping[str, AwkArray]:
907 # First, let's read the arrays as a tuple (to associate with each key)
--> 908 arrays = tree.arrays(
909 keys,
910 entry_start=start,
911 entry_stop=stop,
912 ak_add_doc=options["ak_add_doc"],
913 decompression_executor=decompression_executor,
914 interpretation_executor=interpretation_executor,
915 how=tuple,
916 )
918 awkward = uproot.extras.awkward()
920 # The subform generated by awkward.to_buffers() has different form keys
921 # from those used to perform buffer projection. However, the subform
922 # structure should be identical to the projection optimisation
923 # subform, as they're derived from `branch.interpretation.awkward_form`
924 # Therefore, we can correlate the subform keys using `expected_from_buffers`
File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/behaviors/TBranch.py:823, in HasBranches.arrays(self, expressions, cut, filter_name, filter_typename, filter_branch, aliases, language, entry_start, entry_stop, decompression_executor, interpretation_executor, array_cache, library, ak_add_doc, how)
820 ranges_or_baskets.append((branch, basket_num, range_or_basket))
822 interp_options = {"ak_add_doc": ak_add_doc}
--> 823 _ranges_or_baskets_to_arrays(
824 self,
825 ranges_or_baskets,
826 branchid_interpretation,
827 entry_start,
828 entry_stop,
829 decompression_executor,
830 interpretation_executor,
831 library,
832 arrays,
833 False,
834 interp_options,
835 )
837 # no longer needed; save memory
838 del ranges_or_baskets
File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/behaviors/TBranch.py:3105, in _ranges_or_baskets_to_arrays(hasbranches, ranges_or_baskets, branchid_interpretation, entry_start, entry_stop, decompression_executor, interpretation_executor, library, arrays, update_ranges_or_baskets, interp_options)
3102 pass
3104 elif isinstance(obj, tuple) and len(obj) == 3:
-> 3105 uproot.source.futures.delayed_raise(*obj)
3107 else:
3108 raise AssertionError(obj)
File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/source/futures.py:38, in delayed_raise(exception_class, exception_value, traceback)
34 def delayed_raise(exception_class, exception_value, traceback):
35 """
36 Raise an exception from a background thread on the main thread.
37 """
---> 38 raise exception_value.with_traceback(traceback)
File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/behaviors/TBranch.py:3026, in _ranges_or_baskets_to_arrays.<locals>.chunk_to_basket(chunk, branch, basket_num)
3024 try:
3025 cursor = uproot.source.cursor.Cursor(chunk.start)
-> 3026 basket = uproot.models.TBasket.Model_TBasket.read(
3027 chunk,
3028 cursor,
3029 {"basket_num": basket_num},
3030 hasbranches._file,
3031 hasbranches._file,
3032 branch,
3033 )
3034 original_index = range_original_index[(chunk.start, chunk.stop)]
3035 if update_ranges_or_baskets:
File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/model.py:854, in Model.read(cls, chunk, cursor, context, file, selffile, parent, concrete)
852 forth_obj.add_node(forth_stash)
853 forth_obj.push_active_node(forth_stash)
--> 854 self.read_members(chunk, cursor, context, file)
855 if forth_obj is not None:
856 forth_obj.pop_active_node()
File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/models/TBasket.py:227, in Model_TBasket.read_members(self, chunk, cursor, context, file)
217 assert isinstance(self._parent, uproot.behaviors.TBranch.TBranch)
218 self._basket_num = context.get("basket_num")
220 (
221 self._members["fNbytes"],
222 self._key_version,
223 self._members["fObjlen"],
224 self._members["fDatime"],
225 self._members["fKeylen"],
226 self._members["fCycle"],
--> 227 ) = cursor.fields(chunk, _tbasket_format1, context)
229 # skip the class name, name, and title
230 cursor.move_to(
231 self._cursor.index + self._members["fKeylen"] - _tbasket_format2.size - 1
232 )
File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/source/cursor.py:201, in Cursor.fields(self, chunk, format, context, move)
199 if move:
200 self._index = stop
--> 201 return format.unpack(chunk.get(start, stop, self, context))
File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/source/chunk.py:446, in Chunk.get(self, start, stop, cursor, context)
424 """
425 Args:
426 start (int): Seek position of the first byte to include.
(...)
443 already.
444 """
445 if (start, stop) in self:
--> 446 self.wait(insist=stop)
447 local_start = start - self._start
448 local_stop = stop - self._start
File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/source/chunk.py:388, in Chunk.wait(self, insist)
377 """
378 Args:
379 insist (bool or int): If True, raise an OSError if ``raw_data`` does
(...)
385 :ref:`uproot.source.chunk.Chunk.future` completes).
386 """
387 if self._raw_data is None:
--> 388 self._raw_data = numpy.frombuffer(self._future.result(), dtype=self._dtype)
389 if insist is True:
390 requirement = len(self._raw_data) == self._stop - self._start
File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/source/coalesce.py:36, in SliceFuture.result(self, timeout)
35 def result(self, timeout=None):
---> 36 return self._parent.result(timeout=timeout)[self._s]
TypeError: 'ClientOSError' object is not subscriptable