datachain
datachain copied to clipboard
Optional File return fails in UDF to warehouse type adjustment
import datachain as dc
from datachain import File
from typing import Tuple, Optional
def try_map(file: File) -> Tuple[Optional[File], int, str]:
return None, 0, "ok"
(
dc.read_storage("<bucket>")
.limit(10)
.map(try_map, output={"eeg_data": File, "sampling_frequency": int, "error": str})
.show(10)
)
Fails with:
File "/Users/ivan/Projects/dvcx/src/datachain/lib/dc/datachain.py", line 988, in collect_flatten
with self._query.ordered_select(*db_signals).as_iterable() as rows:
File "/opt/homebrew/Cellar/[email protected]/3.11.9_1/Frameworks/Python.framework/Versions/3.11/lib/python3.11/contextlib.py", line 137, in __enter__
return next(self.gen)
^^^^^^^^^^^^^^
File "/Users/ivan/Projects/dvcx/src/datachain/query/dataset.py", line 1305, in as_iterable
query = self.apply_steps().select()
^^^^^^^^^^^^^^^^^^
File "/Users/ivan/Projects/dvcx/src/datachain/query/dataset.py", line 1251, in apply_steps
result = step.apply(
^^^^^^^^^^^
File "/Users/ivan/Projects/dvcx/src/datachain/query/dataset.py", line 614, in apply
self.populate_udf_table(udf_table, query)
File "/Users/ivan/Projects/dvcx/src/datachain/query/dataset.py", line 532, in populate_udf_table
process_udf_outputs(
File "/Users/ivan/Projects/dvcx/src/datachain/query/dataset.py", line 345, in process_udf_outputs
rows.append(adjust_outputs(warehouse, row, udf_col_types))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ivan/Projects/dvcx/src/datachain/query/dataset.py", line 300, in adjust_outputs
row[col_name] = warehouse.convert_type(
^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ivan/Projects/dvcx/src/datachain/data_storage/warehouse.py", line 150, in convert_type
raise ve
ValueError: Value 0 with type <class 'int'> incompatible for column type String
Workaround is to use File(path="") instead of None in the return.