mars
mars copied to clipboard
[BUG] Series.drop_duplicates raised a `TypeError`
Describe the bug
Failed to execute Series.drop_duplicates
.
In [75]: a = md.DataFrame(np.random.rand(10, 2), columns=['a', 'b'], chunk_size=2)
In [76]: a['a'].drop_duplicates().execute()
0%| | 0/100 [00:00<?, ?it/s]Failed to run subtask l8o2G1V5iJMZVFK7USec2C0k on band numa-0
Traceback (most recent call last):
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/services/scheduling/worker/execution.py", line 263, in internal_run_subtask
subtask, band_name, subtask_api, batch_quota_req)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/services/scheduling/worker/execution.py", line 340, in _retry_run_subtask
return await _retry_run(subtask, subtask_info, _run_subtask_once)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/services/scheduling/worker/execution.py", line 83, in _retry_run
raise ex
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/services/scheduling/worker/execution.py", line 67, in _retry_run
return await target_async_func(*args)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/services/scheduling/worker/execution.py", line 301, in _run_subtask_once
return await asyncio.shield(aiotask)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/services/subtask/api.py", line 59, in run_subtask_in_slot
return await ref.run_subtask(subtask)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/oscar/backends/context.py", line 154, in send
return self._process_result_message(result)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/oscar/backends/context.py", line 59, in _process_result_message
raise message.error.with_traceback(message.traceback)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/oscar/backends/pool.py", line 496, in send
result = await future
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/oscar/api.py", line 118, in __on_receive__
return await super().__on_receive__(message)
File "mars/oscar/core.pyx", line 351, in __on_receive__
raise ex
File "mars/oscar/core.pyx", line 345, in mars.oscar.core._BaseActor.__on_receive__
return await self._handle_actor_result(result)
File "mars/oscar/core.pyx", line 250, in _handle_actor_result
result = list(dones)[0].result()
File "mars/oscar/core.pyx", line 273, in mars.oscar.core._BaseActor._run_actor_async_generator
with debug_async_timeout('actor_lock_timeout',
File "mars/oscar/core.pyx", line 275, in mars.oscar.core._BaseActor._run_actor_async_generator
async with self._lock:
File "mars/oscar/core.pyx", line 279, in mars.oscar.core._BaseActor._run_actor_async_generator
res = await gen.athrow(*res)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/services/subtask/worker/runner.py", line 104, in run_subtask
result = yield self._running_processor.run(subtask)
File "mars/oscar/core.pyx", line 284, in mars.oscar.core._BaseActor._run_actor_async_generator
res = await self._handle_actor_result(res)
File "mars/oscar/core.pyx", line 219, in _handle_actor_result
result = await result
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/oscar/backends/context.py", line 154, in send
return self._process_result_message(result)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/oscar/backends/context.py", line 59, in _process_result_message
raise message.error.with_traceback(message.traceback)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/oscar/backends/pool.py", line 496, in send
result = await future
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/oscar/api.py", line 118, in __on_receive__
return await super().__on_receive__(message)
File "mars/oscar/core.pyx", line 351, in __on_receive__
raise ex
File "mars/oscar/core.pyx", line 345, in mars.oscar.core._BaseActor.__on_receive__
return await self._handle_actor_result(result)
File "mars/oscar/core.pyx", line 250, in _handle_actor_result
result = list(dones)[0].result()
File "mars/oscar/core.pyx", line 273, in mars.oscar.core._BaseActor._run_actor_async_generator
with debug_async_timeout('actor_lock_timeout',
File "mars/oscar/core.pyx", line 275, in mars.oscar.core._BaseActor._run_actor_async_generator
async with self._lock:
File "mars/oscar/core.pyx", line 279, in mars.oscar.core._BaseActor._run_actor_async_generator
res = await gen.athrow(*res)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/services/subtask/worker/processor.py", line 482, in run
result = yield self._running_aio_task
File "mars/oscar/core.pyx", line 284, in mars.oscar.core._BaseActor._run_actor_async_generator
res = await self._handle_actor_result(res)
File "mars/oscar/core.pyx", line 219, in _handle_actor_result
result = await result
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/services/subtask/worker/processor.py", line 374, in run
stored_keys, store_sizes, memory_sizes, data_key_to_object_id = await self._store_data(chunk_graph)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/services/subtask/worker/processor.py", line 248, in _store_data
result_chunk.params = result_chunk.get_params_from_data(result_data)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/dataframe/core.py", line 1443, in get_params_from_data
value=data.dtypes)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/dataframe/core.py", line 355, in __init__
super().__init__(_key=key, _value=value, **kw)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/serialization/serializables/core.py", line 67, in __init__
object.__setattr__(self, key, val)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/serialization/serializables/field.py", line 106, in __set__
raise type(e)(f'Failed to set `{self._attr_name}`: {str(e)}')
TypeError: Failed to set `_value`: value needs to be instance of (<class 'pandas.core.series.Series'>,), got <class 'numpy.dtype[float64]'>
Subtask l8o2G1V5iJMZVFK7USec2C0k errored
Traceback (most recent call last):
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/services/scheduling/worker/execution.py", line 263, in internal_run_subtask
subtask, band_name, subtask_api, batch_quota_req)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/services/scheduling/worker/execution.py", line 340, in _retry_run_subtask
return await _retry_run(subtask, subtask_info, _run_subtask_once)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/services/scheduling/worker/execution.py", line 83, in _retry_run
raise ex
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/services/scheduling/worker/execution.py", line 67, in _retry_run
return await target_async_func(*args)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/services/scheduling/worker/execution.py", line 301, in _run_subtask_once
return await asyncio.shield(aiotask)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/services/subtask/api.py", line 59, in run_subtask_in_slot
return await ref.run_subtask(subtask)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/oscar/backends/context.py", line 154, in send
return self._process_result_message(result)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/oscar/backends/context.py", line 59, in _process_result_message
raise message.error.with_traceback(message.traceback)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/oscar/backends/pool.py", line 496, in send
result = await future
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/oscar/api.py", line 118, in __on_receive__
return await super().__on_receive__(message)
File "mars/oscar/core.pyx", line 351, in __on_receive__
raise ex
File "mars/oscar/core.pyx", line 345, in mars.oscar.core._BaseActor.__on_receive__
return await self._handle_actor_result(result)
File "mars/oscar/core.pyx", line 250, in _handle_actor_result
result = list(dones)[0].result()
File "mars/oscar/core.pyx", line 273, in mars.oscar.core._BaseActor._run_actor_async_generator
with debug_async_timeout('actor_lock_timeout',
File "mars/oscar/core.pyx", line 275, in mars.oscar.core._BaseActor._run_actor_async_generator
async with self._lock:
File "mars/oscar/core.pyx", line 279, in mars.oscar.core._BaseActor._run_actor_async_generator
res = await gen.athrow(*res)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/services/subtask/worker/runner.py", line 104, in run_subtask
result = yield self._running_processor.run(subtask)
File "mars/oscar/core.pyx", line 284, in mars.oscar.core._BaseActor._run_actor_async_generator
res = await self._handle_actor_result(res)
File "mars/oscar/core.pyx", line 219, in _handle_actor_result
result = await result
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/oscar/backends/context.py", line 154, in send
return self._process_result_message(result)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/oscar/backends/context.py", line 59, in _process_result_message
raise message.error.with_traceback(message.traceback)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/oscar/backends/pool.py", line 496, in send
result = await future
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/oscar/api.py", line 118, in __on_receive__
return await super().__on_receive__(message)
File "mars/oscar/core.pyx", line 351, in __on_receive__
raise ex
File "mars/oscar/core.pyx", line 345, in mars.oscar.core._BaseActor.__on_receive__
return await self._handle_actor_result(result)
File "mars/oscar/core.pyx", line 250, in _handle_actor_result
result = list(dones)[0].result()
File "mars/oscar/core.pyx", line 273, in mars.oscar.core._BaseActor._run_actor_async_generator
with debug_async_timeout('actor_lock_timeout',
File "mars/oscar/core.pyx", line 275, in mars.oscar.core._BaseActor._run_actor_async_generator
async with self._lock:
File "mars/oscar/core.pyx", line 279, in mars.oscar.core._BaseActor._run_actor_async_generator
res = await gen.athrow(*res)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/services/subtask/worker/processor.py", line 482, in run
result = yield self._running_aio_task
File "mars/oscar/core.pyx", line 284, in mars.oscar.core._BaseActor._run_actor_async_generator
res = await self._handle_actor_result(res)
File "mars/oscar/core.pyx", line 219, in _handle_actor_result
result = await result
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/services/subtask/worker/processor.py", line 374, in run
stored_keys, store_sizes, memory_sizes, data_key_to_object_id = await self._store_data(chunk_graph)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/services/subtask/worker/processor.py", line 248, in _store_data
result_chunk.params = result_chunk.get_params_from_data(result_data)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/dataframe/core.py", line 1443, in get_params_from_data
value=data.dtypes)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/dataframe/core.py", line 355, in __init__
super().__init__(_key=key, _value=value, **kw)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/serialization/serializables/core.py", line 67, in __init__
object.__setattr__(self, key, val)
File "/Users/hekaisheng/Documents/mars_dev/mars/mars/serialization/serializables/field.py", line 106, in __set__
raise type(e)(f'Failed to set `{self._attr_name}`: {str(e)}')
TypeError: Failed to set `_value`: value needs to be instance of (<class 'pandas.core.series.Series'>,), got <class 'numpy.dtype[float64]'>
The output type is wrong in DataFrameDropDuplicates
's tile, here is the related code
https://github.com/mars-project/mars/blob/86bbdd0e63e04fa278c240b5398bb895310c84c1/mars/dataframe/base/_duplicate.py#L151-L155
For series input, output type should always be series.
Hello! Iam a beginner to open source. I would like to contribute to this issue. Could you please explain me this issue?
Can you let me help