dataprep
dataprep copied to clipboard
Create report does not handle a pure nan dataframe
Describe the bug
create_report(pd.DataFrame([[np.nan], [np.nan]], columns=["a"]))
Throws error.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-95-62261acfa27b> in <module>
----> 1 create_report(pd.DataFrame([[np.nan], [np.nan]], columns=["a"]))
~/projects/dataprep/dataprep/eda/create_report/__init__.py in create_report(df, title, mode, progress)
52 "resources": INLINE.render(),
53 "title": title,
---> 54 "components": format_report(df, mode, progress),
55 }
56 template_base = ENV_LOADER.get_template("base.html")
~/projects/dataprep/dataprep/eda/create_report/formatter.py in format_report(df, mode, progress)
53 df = to_dask(df)
54 if mode == "basic":
---> 55 comps = format_basic(df)
56 # elif mode == "full":
57 # comps = format_full(df)
~/projects/dataprep/dataprep/eda/create_report/formatter.py in format_basic(df)
88 category=RuntimeWarning,
89 )
---> 90 (data,) = dask.compute(data)
91
92 # results dictionary
~/projects/dataprep/.venv/lib/python3.7/site-packages/dask/base.py in compute(*args, **kwargs)
450 postcomputes.append(x.__dask_postcompute__())
451
--> 452 results = schedule(dsk, keys, **kwargs)
453 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
454
~/projects/dataprep/.venv/lib/python3.7/site-packages/dask/threaded.py in get(dsk, result, cache, num_workers, pool, **kwargs)
82 get_id=_thread_get_id,
83 pack_exception=pack_exception,
---> 84 **kwargs
85 )
86
~/projects/dataprep/.venv/lib/python3.7/site-packages/dask/local.py in get_async(apply_async, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, **kwargs)
484 _execute_task(task, data) # Re-execute locally
485 else:
--> 486 raise_exception(exc, tb)
487 res, worker_id = loads(res_info)
488 state["cache"][key] = res
~/projects/dataprep/.venv/lib/python3.7/site-packages/dask/local.py in reraise(exc, tb)
314 if exc.__traceback__ is not tb:
315 raise exc.with_traceback(tb)
--> 316 raise exc
317
318
~/projects/dataprep/.venv/lib/python3.7/site-packages/dask/local.py in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
220 try:
221 task, data = loads(task_info)
--> 222 result = _execute_task(task, data)
223 id = get_id()
224 result = dumps((result, id))
~/projects/dataprep/.venv/lib/python3.7/site-packages/dask/core.py in _execute_task(arg, cache, dsk)
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
~/projects/dataprep/.venv/lib/python3.7/site-packages/dask/core.py in <genexpr>(.0)
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
~/projects/dataprep/.venv/lib/python3.7/site-packages/dask/core.py in _execute_task(arg, cache, dsk)
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
~/projects/dataprep/.venv/lib/python3.7/site-packages/dask/array/percentile.py in merge_percentiles(finalq, qs, vals, interpolation, Ns)
190 L = list(zip(*[(q, val, N) for q, val, N in zip(qs, vals, Ns) if N]))
191 if not L:
--> 192 raise ValueError("No non-trivial arrays found")
193 qs, vals, Ns = L
194
ValueError: No non-trivial arrays found
It's possible a bug from upstream: https://github.com/dask/dask/issues/2792