Irreducible unions when reading from parquet file via `ak.from_parquet`
Version of Awkward Array
2.8.10
Description and code to reproduce
I am getting an error TypeError: irreducible unions (different types at the same level in an array) can't be used as slices when attempting to apply a mask to a concatenated array (where the original arrays were read from a parquet file via ak.from_parquet). The same code runs without issue when I read from the parquet via read_table from pyarrow.parquet.
This code reproduces the issue:
wget http://uaf-10.t2.ucsd.edu/~kmohrman/public_html_backup/files/parquet_files/100k_from_lindsey_file/test_pq_100k.parquet
import awkward as ak
import pyarrow.parquet as pq
from coffea.nanoevents.methods import candidate
def get_table(filepath,column_lst,read_method):
# Works
if read_method == "pyarrow_parquet_read_table":
table = pq.read_table(filepath, columns = column_lst)
# Does not work
if read_method == "ak_from_parquet":
table = ak.from_parquet(filepath, columns = column_lst)
return table
for read_method in ["pyarrow_parquet_read_table","ak_from_parquet"]:
filepath = "test_pq_100k.parquet"
column_lst = ["Muon_pt", "Muon_eta", "Muon_phi", "Muon_mass", "Muon_charge", "Electron_pt", "Electron_eta", "Electron_phi", "Electron_mass", "Electron_charge"]
table = get_table(filepath,column_lst,read_method=read_method)
Muon_pt = table["Muon_pt"]
Muon_eta = table["Muon_eta"]
Muon_phi = table["Muon_phi"]
Muon_mass = table["Muon_mass"]
Muon_charge = table["Muon_charge"]
Electron_pt = table["Electron_pt"]
Electron_eta = table["Electron_eta"]
Electron_phi = table["Electron_phi"]
Electron_mass = table["Electron_mass"]
Electron_charge = table["Electron_charge"]
Electron = ak.zip( {"pt": Electron_pt, "eta": Electron_eta, "phi": Electron_phi, "mass": Electron_mass, "charge": Electron_charge}, with_name="PtEtaPhiMCandidate", behavior=candidate.behavior)
Muon = ak.zip( {"pt": Muon_pt, "eta": Muon_eta, "phi": Muon_phi, "mass": Muon_mass, "charge": Muon_charge}, with_name="PtEtaPhiMCandidate", behavior=candidate.behavior)
leptons = ak.with_name(ak.concatenate([Electron,Muon],axis=1),'PtEtaPhiMCandidate')
print("\nRead method:",read_method)
print("(leptons.pt>10).layout\n",(leptons.pt>10).layout)
leptons_gr10 = leptons[leptons.pt>10]
print("Pt of leptons with pt greater than 10:",leptons_gr10.pt)
For the ak.from_parquet case, we encounter the error [1] on the leptons[leptons.pt>10] line. One difference seems to be that in the ak.from_parquet case, the layout of the leptons.pt>10 mask is a UnionArray (while for the pq.read_table case it is not), which is potentially unexpected.
[1]
Traceback (most recent call last):
File "/home/k.mohrman/coffea_dir/gpu_studies/columnar_gpu/tmp/mre.py", line 43, in <module>
leptons_gr10 = leptons[leptons.pt>10]
~~~~~~~^^^^^^^^^^^^^^^
File "/blue/p.chang/k.mohrman/dir_for_miniconda/miniconda3/envs/coffeagpu_env10/lib/python3.13/site-packages/awkward/highlevel.py", line 1104, in __getitem__
with ak._errors.SlicingErrorContext(self, where):
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^
File "/blue/p.chang/k.mohrman/dir_for_miniconda/miniconda3/envs/coffeagpu_env10/lib/python3.13/site-packages/awkward/_errors.py", line 80, in __exit__
raise self.decorate_exception(exception_type, exception_value)
File "/blue/p.chang/k.mohrman/dir_for_miniconda/miniconda3/envs/coffeagpu_env10/lib/python3.13/site-packages/awkward/highlevel.py", line 1112, in __getitem__
indexed_layout = prepare_layout(self._layout._getitem(where, NamedAxis))
~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^
File "/blue/p.chang/k.mohrman/dir_for_miniconda/miniconda3/envs/coffeagpu_env10/lib/python3.13/site-packages/awkward/contents/content.py", line 659, in _getitem
return self._getitem(where.layout, named_axis)
~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/blue/p.chang/k.mohrman/dir_for_miniconda/miniconda3/envs/coffeagpu_env10/lib/python3.13/site-packages/awkward/contents/content.py", line 739, in _getitem
return self._getitem((where,), named_axis)
~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^
File "/blue/p.chang/k.mohrman/dir_for_miniconda/miniconda3/envs/coffeagpu_env10/lib/python3.13/site-packages/awkward/contents/content.py", line 584, in _getitem
items = ak._slicing.normalise_items(where, backend)
File "/blue/p.chang/k.mohrman/dir_for_miniconda/miniconda3/envs/coffeagpu_env10/lib/python3.13/site-packages/awkward/_slicing.py", line 306, in normalise_items
return [normalise_item(x, backend=backend) for x in where]
~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^
File "/blue/p.chang/k.mohrman/dir_for_miniconda/miniconda3/envs/coffeagpu_env10/lib/python3.13/site-packages/awkward/_slicing.py", line 247, in normalise_item
out = _normalise_item_bool_to_int(_normalise_item_nested(item), backend)
~~~~~~~~~~~~~~~~~~~~~~^^^^^^
File "/blue/p.chang/k.mohrman/dir_for_miniconda/miniconda3/envs/coffeagpu_env10/lib/python3.13/site-packages/awkward/_slicing.py", line 353, in _normalise_item_nested
_normalise_item_nested(item.content),
~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^
File "/blue/p.chang/k.mohrman/dir_for_miniconda/miniconda3/envs/coffeagpu_env10/lib/python3.13/site-packages/awkward/_slicing.py", line 457, in _normalise_item_nested
raise TypeError(
"irreducible unions (different types at the same level in an array) can't be used as slices"
)
TypeError: irreducible unions (different types at the same level in an array) can't be used as slices
This error occurred while attempting to slice
<PtEtaPhiMCandidateArray [[{pt: 12.8, eta: -0.623, ...}], ...] type='10...'>
with
<Array [[True], [True], ..., [True, False]] type='100000 * var * union[bool...'>
Just curious, does ak.from_arrow(pq.read_table(...)) also work?