pmda
pmda copied to clipboard
distributed.client error with atomgroup or universe type self attribute
Distributed
An error is raised when one uses atomgroup or universe type self attribute with distributed.client dsak scheduler.
One example is rdf.py (PR #70 ), which had one line
self.ags = ags
And it raised one error;
______________________ test_same_result[distributed-2-1] _______________________
u = <Universe with 43480 atoms>
sels = [[<AtomGroup with 1 atom>, <AtomGroup with 2 atoms>], [<AtomGroup with 2 atoms>, <AtomGroup with 2 atoms>]]
n_blocks = 1, scheduler = None
@pytest.mark.parametrize("n_blocks", [1, 2, 3, 4])
def test_same_result(u, sels, n_blocks, scheduler):
# should see same results from analysis.rdf.InterRDF_s
# and pmda.rdf.InterRDF_s
nrdf = rdf.InterRDF_s(u, sels).run()
> prdf = InterRDF_s(u, sels).run(n_blocks=n_blocks)
/home/shujie/pmda/pmda/test/test_rdf_s.py:109:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/shujie/pmda/pmda/parallel.py:359: in run
res = blocks.compute(**scheduler_kwargs)
/home/shujie/anaconda3/envs/py36/lib/python3.6/site-packages/dask/base.py:156: in compute
(result,) = compute(self, traverse=False, **kwargs)
/home/shujie/anaconda3/envs/py36/lib/python3.6/site-packages/dask/base.py:395: in compute
results = schedule(dsk, keys, **kwargs)
/home/shujie/anaconda3/envs/py36/lib/python3.6/site-packages/distributed/client.py:2230: in get
direct=direct)
/home/shujie/anaconda3/envs/py36/lib/python3.6/site-packages/distributed/client.py:1593: in gather
asynchronous=asynchronous)
/home/shujie/anaconda3/envs/py36/lib/python3.6/site-packages/distributed/client.py:647: in sync
return sync(self.loop, func, *args, **kwargs)
/home/shujie/anaconda3/envs/py36/lib/python3.6/site-packages/distributed/utils.py:277: in sync
six.reraise(*error[0])
/home/shujie/anaconda3/envs/py36/lib/python3.6/site-packages/six.py:693: in reraise
raise value
/home/shujie/anaconda3/envs/py36/lib/python3.6/site-packages/distributed/utils.py:262: in f
result[0] = yield future
/home/shujie/anaconda3/envs/py36/lib/python3.6/site-packages/tornado/gen.py:1099: in run
value = future.result()
/home/shujie/anaconda3/envs/py36/lib/python3.6/site-packages/tornado/gen.py:1107: in run
yielded = self.gen.throw(*exc_info)
/home/shujie/anaconda3/envs/py36/lib/python3.6/site-packages/distributed/client.py:1469: in _gather
traceback)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tp = <class 'distributed.scheduler.KilledWorker'>, value = None, tb = None
def reraise(tp, value, tb=None):
try:
if value is None:
value = tp()
if value.__traceback__ is not tb:
raise value.with_traceback(tb)
> raise value
E distributed.scheduler.KilledWorker: ('_dask_helper-0b781621-ac0a-4bda-b691-1507e7587020', 'tcp://127.0.0.1:36055')
/home/shujie/anaconda3/envs/py36/lib/python3.6/site-packages/six.py:693: KilledWorker
----------------------------- Captured stderr call -----------------------------
distributed.protocol.core - CRITICAL - Failed to deserialize
Traceback (most recent call last):
File "/home/shujie/anaconda3/envs/py36/lib/python3.6/site-packages/MDAnalysis-0.19.1.dev0-py3.6-linux-x86_64.egg/MDAnalysis/core/groups.py", line 119, in _unpickle
u = _ANCHOR_UNIVERSES[uhash]
File "/home/shujie/anaconda3/envs/py36/lib/python3.6/weakref.py", line 137, in __getitem__
o = self.data[key]()
KeyError: UUID('38eaec10-83dd-4648-aca7-5e16ce9f7196')
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/shujie/anaconda3/envs/py36/lib/python3.6/site-packages/distributed/protocol/core.py", line 131, in loads
value = _deserialize(head, fs, deserializers=deserializers)
File "/home/shujie/anaconda3/envs/py36/lib/python3.6/site-packages/distributed/protocol/serialize.py", line 178, in deserialize
return loads(header, frames)
File "/home/shujie/anaconda3/envs/py36/lib/python3.6/site-packages/distributed/protocol/serialize.py", line 57, in pickle_loads
return pickle.loads(b''.join(frames))
File "/home/shujie/anaconda3/envs/py36/lib/python3.6/site-packages/distributed/protocol/pickle.py", line 59, in loads
return pickle.loads(x)
File "/home/shujie/anaconda3/envs/py36/lib/python3.6/site-packages/MDAnalysis-0.19.1.dev0-py3.6-linux-x86_64.egg/MDAnalysis/core/groups.py", line 127, in _unpickle
for k in _ANCHOR_UNIVERSES.keys()])))
RuntimeError: Couldn't find a suitable Universe to unpickle AtomGroup onto with Universe hash '38eaec10-83dd-4648-aca7-5e16ce9f7196'. Available hashes:
distributed.core - ERROR - Couldn't find a suitable Universe to unpickle AtomGroup onto with Universe hash '38eaec10-83dd-4648-aca7-5e16ce9f7196'. Available hashes:
Traceback (most recent call last):
File "/home/shujie/anaconda3/envs/py36/lib/python3.6/site-packages/MDAnalysis-0.19.1.dev0-py3.6-linux-x86_64.egg/MDAnalysis/core/groups.py", line 119, in _unpickle
u = _ANCHOR_UNIVERSES[uhash]
File "/home/shujie/anaconda3/envs/py36/lib/python3.6/weakref.py", line 137, in __getitem__
o = self.data[key]()
KeyError: UUID('38eaec10-83dd-4648-aca7-5e16ce9f7196')
As mentioned in issue #76, current solution is to avoid setting atomgroup or universe as self attribute.
version of dask
dask 0.19.4
See also PR #65