dataprep icon indicating copy to clipboard operation
dataprep copied to clipboard

Create report does not handle a pure nan dataframe

Open dovahcrow opened this issue 4 years ago • 1 comments

Describe the bug

create_report(pd.DataFrame([[np.nan], [np.nan]], columns=["a"]))

Throws error.

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-95-62261acfa27b> in <module>
----> 1 create_report(pd.DataFrame([[np.nan], [np.nan]], columns=["a"]))

~/projects/dataprep/dataprep/eda/create_report/__init__.py in create_report(df, title, mode, progress)
     52         "resources": INLINE.render(),
     53         "title": title,
---> 54         "components": format_report(df, mode, progress),
     55     }
     56     template_base = ENV_LOADER.get_template("base.html")

~/projects/dataprep/dataprep/eda/create_report/formatter.py in format_report(df, mode, progress)
     53         df = to_dask(df)
     54         if mode == "basic":
---> 55             comps = format_basic(df)
     56         # elif mode == "full":
     57         #     comps = format_full(df)

~/projects/dataprep/dataprep/eda/create_report/formatter.py in format_basic(df)
     88             category=RuntimeWarning,
     89         )
---> 90         (data,) = dask.compute(data)
     91 
     92     # results dictionary

~/projects/dataprep/.venv/lib/python3.7/site-packages/dask/base.py in compute(*args, **kwargs)
    450         postcomputes.append(x.__dask_postcompute__())
    451 
--> 452     results = schedule(dsk, keys, **kwargs)
    453     return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
    454 

~/projects/dataprep/.venv/lib/python3.7/site-packages/dask/threaded.py in get(dsk, result, cache, num_workers, pool, **kwargs)
     82         get_id=_thread_get_id,
     83         pack_exception=pack_exception,
---> 84         **kwargs
     85     )
     86 

~/projects/dataprep/.venv/lib/python3.7/site-packages/dask/local.py in get_async(apply_async, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, **kwargs)
    484                         _execute_task(task, data)  # Re-execute locally
    485                     else:
--> 486                         raise_exception(exc, tb)
    487                 res, worker_id = loads(res_info)
    488                 state["cache"][key] = res

~/projects/dataprep/.venv/lib/python3.7/site-packages/dask/local.py in reraise(exc, tb)
    314     if exc.__traceback__ is not tb:
    315         raise exc.with_traceback(tb)
--> 316     raise exc
    317 
    318 

~/projects/dataprep/.venv/lib/python3.7/site-packages/dask/local.py in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
    220     try:
    221         task, data = loads(task_info)
--> 222         result = _execute_task(task, data)
    223         id = get_id()
    224         result = dumps((result, id))

~/projects/dataprep/.venv/lib/python3.7/site-packages/dask/core.py in _execute_task(arg, cache, dsk)
    119         # temporaries by their reference count and can execute certain
    120         # operations in-place.
--> 121         return func(*(_execute_task(a, cache) for a in args))
    122     elif not ishashable(arg):
    123         return arg

~/projects/dataprep/.venv/lib/python3.7/site-packages/dask/core.py in <genexpr>(.0)
    119         # temporaries by their reference count and can execute certain
    120         # operations in-place.
--> 121         return func(*(_execute_task(a, cache) for a in args))
    122     elif not ishashable(arg):
    123         return arg

~/projects/dataprep/.venv/lib/python3.7/site-packages/dask/core.py in _execute_task(arg, cache, dsk)
    119         # temporaries by their reference count and can execute certain
    120         # operations in-place.
--> 121         return func(*(_execute_task(a, cache) for a in args))
    122     elif not ishashable(arg):
    123         return arg

~/projects/dataprep/.venv/lib/python3.7/site-packages/dask/array/percentile.py in merge_percentiles(finalq, qs, vals, interpolation, Ns)
    190     L = list(zip(*[(q, val, N) for q, val, N in zip(qs, vals, Ns) if N]))
    191     if not L:
--> 192         raise ValueError("No non-trivial arrays found")
    193     qs, vals, Ns = L
    194 

ValueError: No non-trivial arrays found

dovahcrow avatar Sep 09 '20 07:09 dovahcrow

It's possible a bug from upstream: https://github.com/dask/dask/issues/2792

eutialia avatar Sep 11 '20 22:09 eutialia