tcp icon indicating copy to clipboard operation
tcp copied to clipboard

`rtd_ray.upgrade_rtd()`

Open mariusdkm opened this issue 2 years ago • 0 comments

When after using rtd_ray.download_rtd() and then rtd_ray.upgrade_rtd() once,
rtd_ray.upgrade_rtd() doesn't work anymore. I think rtd_ray.upgrade_rtd() does something weird.

Function:  subgraph_callable-81d1293f-236b-4a65-a47d-08df5268
args:      ({'piece': ('/usr/src/app/cache/recent_change_rtd/_new/part.0.parquet', None, None)})
kwargs:    {}
Exception: "ValueError('Categorical categories cannot be null')"

Traceback (most recent call last):
  File "/usr/local/lib/python3.9/runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/local/lib/python3.9/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/usr/src/app/update_butler/__main__.py", line 28, in <module>
    rtd_ray.upgrade_rtd()
  File "/usr/src/app/helpers/RtdRay.py", line 294, in upgrade_rtd
    rtd = self.load_data()
  File "/usr/src/app/helpers/RtdRay.py", line 430, in load_data
    rtd[key] = rtd[key].cat.set_categories(rtd[key].head(1).cat.categories)
  File "/opt/venv/lib/python3.9/site-packages/dask/dataframe/core.py", line 1098, in head
    return self._head(n=n, npartitions=npartitions, compute=compute, safe=safe)
  File "/opt/venv/lib/python3.9/site-packages/dask/dataframe/core.py", line 1132, in _head
    result = result.compute()
  File "/opt/venv/lib/python3.9/site-packages/dask/base.py", line 288, in compute
    (result,) = compute(self, traverse=False, **kwargs)
  File "/opt/venv/lib/python3.9/site-packages/dask/base.py", line 570, in compute
    results = schedule(dsk, keys, **kwargs)
  File "/opt/venv/lib/python3.9/site-packages/distributed/client.py", line 2722, in get
    results = self.gather(packed, asynchronous=asynchronous, direct=direct)
  File "/opt/venv/lib/python3.9/site-packages/distributed/client.py", line 1977, in gather
    return self.sync(
  File "/opt/venv/lib/python3.9/site-packages/distributed/client.py", line 865, in sync
    return sync(
  File "/opt/venv/lib/python3.9/site-packages/distributed/utils.py", line 327, in sync
    raise exc.with_traceback(tb)
  File "/opt/venv/lib/python3.9/site-packages/distributed/utils.py", line 310, in f
    result[0] = yield future
  File "/opt/venv/lib/python3.9/site-packages/tornado/gen.py", line 762, in run
    value = future.result()
  File "/opt/venv/lib/python3.9/site-packages/distributed/client.py", line 1842, in _gather
    raise exception.with_traceback(traceback)
  File "/opt/venv/lib/python3.9/site-packages/dask/optimization.py", line 969, in __call__
    return core.get(self.dsk, self.outkey, dict(zip(self.inkeys, args)))
  File "/opt/venv/lib/python3.9/site-packages/dask/core.py", line 149, in get
    result = _execute_task(task, cache)
  File "/opt/venv/lib/python3.9/site-packages/dask/core.py", line 119, in _execute_task
    return func(*(_execute_task(a, cache) for a in args))
  File "/opt/venv/lib/python3.9/site-packages/dask/dataframe/io/parquet/core.py", line 87, in __call__
    return read_parquet_part(
  File "/opt/venv/lib/python3.9/site-packages/dask/dataframe/io/parquet/core.py", line 422, in read_parquet_part
    dfs = [
  File "/opt/venv/lib/python3.9/site-packages/dask/dataframe/io/parquet/core.py", line 423, in <listcomp>
    func(fs, rg, columns.copy(), index, **toolz.merge(kwargs, kw))
  File "/opt/venv/lib/python3.9/site-packages/dask/dataframe/io/parquet/arrow.py", line 432, in read_partition
    df = cls._arrow_table_to_pandas(arrow_table, categories, **kwargs)
  File "/opt/venv/lib/python3.9/site-packages/dask/dataframe/io/parquet/arrow.py", line 1574, in _arrow_table_to_pandas
    return arrow_table.to_pandas(categories=categories, **_kwargs)
  File "pyarrow/array.pxi", line 766, in pyarrow.lib._PandasConvertible.to_pandas
  File "pyarrow/table.pxi", line 1815, in pyarrow.lib.Table._to_pandas
  File "/opt/venv/lib/python3.9/site-packages/pyarrow/pandas_compat.py", line 789, in table_to_blockmanager
    blocks = _table_to_blocks(options, table, categories, ext_columns_dtypes)
  File "/opt/venv/lib/python3.9/site-packages/pyarrow/pandas_compat.py", line 1130, in _table_to_blocks
    return [_reconstruct_block(item, columns, extension_columns)
  File "/opt/venv/lib/python3.9/site-packages/pyarrow/pandas_compat.py", line 1130, in <listcomp>
    return [_reconstruct_block(item, columns, extension_columns)
  File "/opt/venv/lib/python3.9/site-packages/pyarrow/pandas_compat.py", line 728, in _reconstruct_block
    cat = _pandas_api.categorical_type.from_codes(
  File "/opt/venv/lib/python3.9/site-packages/pandas/core/arrays/categorical.py", line 672, in from_codes
    dtype = CategoricalDtype._from_values_or_dtype(
  File "/opt/venv/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py", line 296, in _from_values_or_dtype
    dtype = CategoricalDtype(categories, ordered)
  File "/opt/venv/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py", line 183, in __init__
    self._finalize(categories, ordered, fastpath=False)
  File "/opt/venv/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py", line 337, in _finalize
    categories = self.validate_categories(categories, fastpath=fastpath)
  File "/opt/venv/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py", line 537, in validate_categories
    raise ValueError("Categorical categories cannot be null")
ValueError: Categorical categories cannot be null

mariusdkm avatar Nov 07 '21 14:11 mariusdkm