distributed
distributed copied to clipboard
`test_compute_per_key` flaky
Ubuntu, 3.8: https://github.com/dask/distributed/pull/4925/checks?check_run_id=2888102571#step:10:2318
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
c = <Client: No scheduler connected>
s = <Scheduler: "tcp://127.0.0.1:52494" workers: 0 cores: 0, tasks: 0>
a = <Worker: 'tcp://127.0.0.1:52495', 0, Status.closed, stored: 0, running: 0/1, ready: 0, comm: 0, waiting: -4>
b = <Worker: 'tcp://127.0.0.1:52497', 1, Status.closed, stored: 0, running: 0/2, ready: 0, comm: 0, waiting: -2>
@pytest.mark.flaky(reruns=10, reruns_delay=5, condition=MACOS)
@gen_cluster(client=True, scheduler_kwargs={"dashboard": True})
async def test_compute_per_key(c, s, a, b):
mbk = ComputePerKey(s)
da = pytest.importorskip("dask.array")
x = (da.ones((20, 20), chunks=(10, 10)) + 1).persist(optimize_graph=False)
await x
y = await dask.delayed(inc)(1).persist()
z = (x + x.T) - x.mean(axis=0)
await c.compute(z.sum())
mbk.update()
http_client = AsyncHTTPClient()
response = await http_client.fetch(
"http://localhost:%d/individual-compute-time-per-key" % s.http_server.port
)
assert response.code == 200
> assert ("sum-aggregate") in mbk.compute_source.data["names"]
E AssertionError: assert 'sum-aggregate' in ['mean_chunk', 'sum', 'ones', 'add', 'mean_agg-aggregate']
distributed\dashboard\tests\test_scheduler_bokeh.py:808: AssertionError
Haven't seen a failure in the past 30days. Closing
Happened again on Windows 3.10 on 07/13:
____________________________ test_compute_per_key _____________________________
c = <Client: No scheduler connected>
s = <Scheduler 'tcp://127.0.0.1:54512', workers: 0, cores: 0, tasks: 0>
a = <Worker 'tcp://127.0.0.1:54513', name: 0, status: closed, stored: 0, running: 0/1, ready: 0, comm: 0, waiting: 0>
b = <Worker 'tcp://127.0.0.1:54515', name: 1, status: closed, stored: 0, running: 0/2, ready: 0, comm: 0, waiting: 0>
@gen_cluster(client=True, scheduler_kwargs={"dashboard": True})
asyncdeftest_compute_per_key(c, s, a, b):
mbk = ComputePerKey(s)
da = pytest.importorskip("dask.array")
x = (da.ones((20, 20), chunks=(10, 10)) + 1).persist(optimize_graph=False)
await x
y = await dask.delayed(inc)(1).persist()
z = (x + x.T) - x.mean(axis=0)
await c.compute(z.sum())
mbk.update()
http_client = AsyncHTTPClient()
response = await http_client.fetch(
"http://localhost:%d/individual-compute-time-per-key" % s.http_server.port
)
assert response.code == 200
> assert ("sum-aggregate") in mbk.compute_source.data["names"]
E AssertionError: assert 'sum-aggregate' in ['ones_like', 'sum', 'mean_chunk', 'add', 'mean_agg-aggregate']
distributed\dashboard\tests\test_scheduler_bokeh.py:1022: AssertionError
---------------------------- Captured stdout call -----------------------------
Dumped cluster state to test_cluster_dump\test_compute_per_key.yaml
---------------------------- Captured stderr call -----------------------------
2022-07-13 10:35:41,533 - distributed.scheduler - WARNING - Received heartbeat from unregistered worker 'tcp://127.0.0.1:54515'.
2022-07-13 10:35:41,535 - distributed.worker - ERROR - Scheduler was unaware of this worker 'tcp://127.0.0.1:54515'. Shutting down.
https://github.com/dask/distributed/runs/7318976477?check_suite_focus=true#step:11:1840
The Received heartbeat from unregistered worker is suspicious and probably related to the issue.