pyiron_base
pyiron_base copied to clipboard
Storing only-str numpy arrays in hdf fails
MWE:
from pyiron_base import Project
import numpy as np
pr = Project('dummy')
hdf = pr.create_hdf(pr.path + 'any', 'any')
hdf['key'] = np.array(['list', 'of', 'str'])
results in
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-1-c0cd380030f9> in <module>
3 pr = Project('dummy')
4 hdf = pr.create_hdf(pr.path + 'any', 'any')
----> 5 hdf['key'] = np.array(['list', 'of', 'str'])
/mnt/c/Users/Siemer/pyiron_git/pyiron_base/pyiron_base/generic/hdfio.py in __setitem__(self, key, value)
258 elif isinstance(value, tuple):
259 value = list(value)
--> 260 h5io.write_hdf5(
261 self.file_name,
262 value,
~/anaconda3/envs/pyiron_git/lib/python3.8/site-packages/h5io/_h5io.py in write_hdf5(fname, data, overwrite, compression, title, slash, use_json)
109 del fid[title]
110 cleanup_data = []
--> 111 _triage_write(title, data, fid, comp_kw, str(type(data)),
112 cleanup_data, slash=slash, title=title,
113 use_json=use_json)
~/anaconda3/envs/pyiron_git/lib/python3.8/site-packages/h5io/_h5io.py in _triage_write(key, value, root, comp_kw, where, cleanup_data, slash, title, use_json)
186 if not (value.dtype == np.dtype('object') and
187 len(set([sub.dtype for sub in value])) == 1):
--> 188 _create_titled_dataset(root, key, 'ndarray', value)
189 else:
190 ma_index, ma_data = multiarray_dump(value)
~/anaconda3/envs/pyiron_git/lib/python3.8/site-packages/h5io/_h5io.py in _create_titled_dataset(root, key, title, data, comp_kw)
46 """Helper to create a titled dataset in h5py"""
47 comp_kw = {} if comp_kw is None else comp_kw
---> 48 out = root.create_dataset(key, data=data, **comp_kw)
49 out.attrs['TITLE'] = title
50 return out
~/anaconda3/envs/pyiron_git/lib/python3.8/site-packages/h5py/_hl/group.py in create_dataset(self, name, shape, dtype, data, **kwds)
146 group = self.require_group(parent_path)
147
--> 148 dsid = dataset.make_new_dset(group, shape, dtype, data, name, **kwds)
149 dset = dataset.Dataset(dsid)
150 return dset
~/anaconda3/envs/pyiron_git/lib/python3.8/site-packages/h5py/_hl/dataset.py in make_new_dset(parent, shape, dtype, data, name, chunks, compression, shuffle, fletcher32, maxshape, compression_opts, fillvalue, scaleoffset, track_times, external, track_order, dcpl, allow_unknown_filter)
87 else:
88 dtype = numpy.dtype(dtype)
---> 89 tid = h5t.py_create(dtype, logical=1)
90
91 # Legacy
h5py/h5t.pyx in h5py.h5t.py_create()
h5py/h5t.pyx in h5py.h5t.py_create()
h5py/h5t.pyx in h5py.h5t.py_create()
TypeError: No conversion path for dtype: dtype('<U4')
HDF wants bytes (dtype('S')
), I have a work-around for this in FlattenedStorage
here.
Wouldn't it be reasonable to move this solution upstream to FileHDFio
?