Running tokenizer on dataset (num_proc=2): 0%| | 0/666 [00:00<?, ? examples/s]
[rank0]: Traceback (most recent call last):
[rank0]: File "/data/mnt/LMFlow-20240514/examples/finetune.py", line 61, in
[rank0]: main()
[rank0]: File "/data/mnt/LMFlow-20240514/examples/finetune.py", line 57, in main
[rank0]: tuned_model = finetuner.tune(model=model, dataset=dataset)
[rank0]: File "/data/mnt/LMFlow-20240514/src/lmflow/pipeline/finetuner.py", line 237, in tune
[rank0]: tokenized_dataset = model.tokenize(dataset)
[rank0]: File "/data/mnt/LMFlow-20240514/src/lmflow/models/hf_decoder_model.py", line 622, in tokenize
[rank0]: tokenized_datasets = raw_datasets.map(
[rank0]: File "/data/mnt/LMFlow-20240514/src/lmflow/datasets/dataset.py", line 371, in map
[rank0]: mapped_backend_dataset = self.backend_dataset.map(*args, **kwargs)
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/site-packages/datasets/arrow_dataset.py", line 592, in wrapper
[rank0]: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/site-packages/datasets/arrow_dataset.py", line 557, in wrapper
[rank0]: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/site-packages/datasets/arrow_dataset.py", line 3189, in map
[rank0]: for rank, done, content in iflatmap_unordered(
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/site-packages/datasets/utils/py_utils.py", line 1394, in iflatmap_unordered
[rank0]: [async_result.get(timeout=0.05) for async_result in async_results]
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/site-packages/datasets/utils/py_utils.py", line 1394, in
[rank0]: [async_result.get(timeout=0.05) for async_result in async_results]
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/site-packages/multiprocess/pool.py", line 771, in get
[rank0]: raise self._value
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/site-packages/multiprocess/pool.py", line 537, in _handle_tasks
[rank0]: put(task)
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/site-packages/multiprocess/connection.py", line 214, in send
[rank0]: self._send_bytes(_ForkingPickler.dumps(obj))
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/site-packages/multiprocess/reduction.py", line 54, in dumps
[rank0]: cls(buf, protocol, *args, **kwds).dump(obj)
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/site-packages/dill/_dill.py", line 498, in dump
[rank0]: StockPickler.dump(self, obj)
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/pickle.py", line 487, in dump
[rank0]: self.save(obj)
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/pickle.py", line 560, in save
[rank0]: f(self, obj) # Call unbound method with explicit self
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/pickle.py", line 901, in save_tuple
[rank0]: save(element)
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/pickle.py", line 560, in save
[rank0]: f(self, obj) # Call unbound method with explicit self
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/pickle.py", line 886, in save_tuple
[rank0]: save(element)
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/pickle.py", line 560, in save
[rank0]: f(self, obj) # Call unbound method with explicit self
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/site-packages/dill/_dill.py", line 990, in save_module_dict
[rank0]: StockPickler.save_dict(pickler, obj)
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/pickle.py", line 971, in save_dict
[rank0]: self._batch_setitems(obj.items())
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/pickle.py", line 997, in _batch_setitems
[rank0]: save(v)
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/pickle.py", line 560, in save
[rank0]: f(self, obj) # Call unbound method with explicit self
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/site-packages/dill/_dill.py", line 1493, in save_function
[rank0]: pickler.save_reduce(_create_function, (obj.code,
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/pickle.py", line 692, in save_reduce
[rank0]: save(args)
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/pickle.py", line 560, in save
[rank0]: f(self, obj) # Call unbound method with explicit self
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/pickle.py", line 901, in save_tuple
[rank0]: save(element)
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/pickle.py", line 560, in save
[rank0]: f(self, obj) # Call unbound method with explicit self
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/pickle.py", line 901, in save_tuple
[rank0]: save(element)
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/pickle.py", line 560, in save
[rank0]: f(self, obj) # Call unbound method with explicit self
[rank0]: File "/data/llmpt/anaconda3/envs/lmflow240514/lib/python3.9/site-packages/dill/_dill.py", line 1226, in save_cell
[rank0]: f = obj.cell_contents
[rank0]: ValueError: Cell is empty