uproot5 icon indicating copy to clipboard operation
uproot5 copied to clipboard

`ClientOSError` during repeat file read with `uproot.dask`

Open alexander-held opened this issue 6 months ago • 0 comments

During debugging I ran into some surprising crashes. I was processing the same file multiple times in a setup that can be broken down to this:

import awkward as ak
import uproot

prefix = "https://xrootd-local.unl.edu:1094//store/user/AGC/nanoAOD/TT_TuneCUETP8M1_13TeV-powheg-pythia8/"
fname = prefix + "cmsopendata2015_ttbar_19980_PU25nsData2015v1_76X_mcRun2_asymptotic_v12_ext3-v1_00000_0000.root"

for fname_ in [fname, fname]:
    print(f"reading {fname_}")
    evts = uproot.dask({fname_: "Events"}, steps_per_file=1)
    ak.sum(evts.event).compute()

The first loop iteration is fine, the second one results in a crash:

TypeError: 'ClientOSError' object is not subscriptable

The error is not reproducible with local files for me.

I am using uproot version 5.3.10.

Full trace:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[4], line 10
      8 print(f"reading {fname}")
      9 evts = uproot.dask({fname_: "Events"}, steps_per_file=1)
---> 10 ak.sum(evts.event).compute()

File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/dask/base.py:375, in DaskMethodsMixin.compute(self, **kwargs)
    351 def compute(self, **kwargs):
    352     """Compute this dask collection
    353 
    354     This turns a lazy Dask collection into its in-memory equivalent.
   (...)
    373     dask.compute
    374     """
--> 375     (result,) = compute(self, traverse=False, **kwargs)
    376     return result

File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/dask/base.py:661, in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
    658     postcomputes.append(x.__dask_postcompute__())
    660 with shorten_traceback():
--> 661     results = schedule(dsk, keys, **kwargs)
    663 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])

File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/_dask.py:1227, in _UprootRead.__call__(self, i_start_stop)
   1215     except self.allowed_exceptions as err:
   1216         return (
   1217             self.mock_empty(backend="cpu"),
   1218             _report_failure(
   (...)
   1224             ),
   1225         )
-> 1227 result, _ = self._call_impl(i, start, stop)
   1228 return result

File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/_dask.py:1231, in _UprootRead._call_impl(self, i, start, stop)
   1230 def _call_impl(self, i, start, stop):
-> 1231     return self.read_tree(
   1232         self.ttrees[i],
   1233         start,
   1234         stop,
   1235     )

File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/_dask.py:985, in UprootReadMixin.read_tree(self, tree, start, stop)
    980 nplike = Numpy.instance()
    982 # The remap implementation should correctly populate the generated
    983 # buffer mapping in __call__, such that the high-level form can be
    984 # used in `from_buffers`
--> 985 mapping = self.form_mapping_info.load_buffers(
    986     tree,
    987     self.common_keys,
    988     start,
    989     stop,
    990     self.decompression_executor,
    991     self.interpretation_executor,
    992     self.interp_options,
    993 )
    995 # Populate container with placeholders if keys aren't required
    996 # Otherwise, read from disk
    997 container = {}

File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/_dask.py:908, in TrivialFormMappingInfo.load_buffers(self, tree, keys, start, stop, decompression_executor, interpretation_executor, options)
    897 def load_buffers(
    898     self,
    899     tree: HasBranches,
   (...)
    906 ) -> Mapping[str, AwkArray]:
    907     # First, let's read the arrays as a tuple (to associate with each key)
--> 908     arrays = tree.arrays(
    909         keys,
    910         entry_start=start,
    911         entry_stop=stop,
    912         ak_add_doc=options["ak_add_doc"],
    913         decompression_executor=decompression_executor,
    914         interpretation_executor=interpretation_executor,
    915         how=tuple,
    916     )
    918     awkward = uproot.extras.awkward()
    920     # The subform generated by awkward.to_buffers() has different form keys
    921     # from those used to perform buffer projection. However, the subform
    922     # structure should be identical to the projection optimisation
    923     # subform, as they're derived from `branch.interpretation.awkward_form`
    924     # Therefore, we can correlate the subform keys using `expected_from_buffers`

File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/behaviors/TBranch.py:823, in HasBranches.arrays(self, expressions, cut, filter_name, filter_typename, filter_branch, aliases, language, entry_start, entry_stop, decompression_executor, interpretation_executor, array_cache, library, ak_add_doc, how)
    820                 ranges_or_baskets.append((branch, basket_num, range_or_basket))
    822 interp_options = {"ak_add_doc": ak_add_doc}
--> 823 _ranges_or_baskets_to_arrays(
    824     self,
    825     ranges_or_baskets,
    826     branchid_interpretation,
    827     entry_start,
    828     entry_stop,
    829     decompression_executor,
    830     interpretation_executor,
    831     library,
    832     arrays,
    833     False,
    834     interp_options,
    835 )
    837 # no longer needed; save memory
    838 del ranges_or_baskets

File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/behaviors/TBranch.py:3105, in _ranges_or_baskets_to_arrays(hasbranches, ranges_or_baskets, branchid_interpretation, entry_start, entry_stop, decompression_executor, interpretation_executor, library, arrays, update_ranges_or_baskets, interp_options)
   3102     pass
   3104 elif isinstance(obj, tuple) and len(obj) == 3:
-> 3105     uproot.source.futures.delayed_raise(*obj)
   3107 else:
   3108     raise AssertionError(obj)

File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/source/futures.py:38, in delayed_raise(exception_class, exception_value, traceback)
     34 def delayed_raise(exception_class, exception_value, traceback):
     35     """
     36     Raise an exception from a background thread on the main thread.
     37     """
---> 38     raise exception_value.with_traceback(traceback)

File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/behaviors/TBranch.py:3026, in _ranges_or_baskets_to_arrays.<locals>.chunk_to_basket(chunk, branch, basket_num)
   3024 try:
   3025     cursor = uproot.source.cursor.Cursor(chunk.start)
-> 3026     basket = uproot.models.TBasket.Model_TBasket.read(
   3027         chunk,
   3028         cursor,
   3029         {"basket_num": basket_num},
   3030         hasbranches._file,
   3031         hasbranches._file,
   3032         branch,
   3033     )
   3034     original_index = range_original_index[(chunk.start, chunk.stop)]
   3035     if update_ranges_or_baskets:

File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/model.py:854, in Model.read(cls, chunk, cursor, context, file, selffile, parent, concrete)
    852     forth_obj.add_node(forth_stash)
    853     forth_obj.push_active_node(forth_stash)
--> 854 self.read_members(chunk, cursor, context, file)
    855 if forth_obj is not None:
    856     forth_obj.pop_active_node()

File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/models/TBasket.py:227, in Model_TBasket.read_members(self, chunk, cursor, context, file)
    217 assert isinstance(self._parent, uproot.behaviors.TBranch.TBranch)
    218 self._basket_num = context.get("basket_num")
    220 (
    221     self._members["fNbytes"],
    222     self._key_version,
    223     self._members["fObjlen"],
    224     self._members["fDatime"],
    225     self._members["fKeylen"],
    226     self._members["fCycle"],
--> 227 ) = cursor.fields(chunk, _tbasket_format1, context)
    229 # skip the class name, name, and title
    230 cursor.move_to(
    231     self._cursor.index + self._members["fKeylen"] - _tbasket_format2.size - 1
    232 )

File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/source/cursor.py:201, in Cursor.fields(self, chunk, format, context, move)
    199 if move:
    200     self._index = stop
--> 201 return format.unpack(chunk.get(start, stop, self, context))

File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/source/chunk.py:446, in Chunk.get(self, start, stop, cursor, context)
    424 """
    425 Args:
    426     start (int): Seek position of the first byte to include.
   (...)
    443 already.
    444 """
    445 if (start, stop) in self:
--> 446     self.wait(insist=stop)
    447     local_start = start - self._start
    448     local_stop = stop - self._start

File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/source/chunk.py:388, in Chunk.wait(self, insist)
    377 """
    378 Args:
    379     insist (bool or int): If True, raise an OSError if ``raw_data`` does
   (...)
    385 :ref:`uproot.source.chunk.Chunk.future` completes).
    386 """
    387 if self._raw_data is None:
--> 388     self._raw_data = numpy.frombuffer(self._future.result(), dtype=self._dtype)
    389     if insist is True:
    390         requirement = len(self._raw_data) == self._stop - self._start

File ~/mambaforge/envs/coffea-2024/lib/python3.11/site-packages/uproot/source/coalesce.py:36, in SliceFuture.result(self, timeout)
     35 def result(self, timeout=None):
---> 36     return self._parent.result(timeout=timeout)[self._s]

TypeError: 'ClientOSError' object is not subscriptable

alexander-held avatar Aug 15 '24 15:08 alexander-held