File "/content/drive/MyDrive/bart/train.py", line 230, in
trainer.fit(model, dm)
File "/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py", line 460, in fit
self._run(model)
File "/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py", line 758, in _run
self.dispatch()
File "/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py", line 799, in dispatch
self.accelerator.start_training(self)
File "/usr/local/lib/python3.7/dist-packages/pytorch_lightning/accelerators/accelerator.py", line 96, in start_training
self.training_type_plugin.start_training(trainer)
File "/usr/local/lib/python3.7/dist-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 144, in start_training
self._results = trainer.run_stage()
File "/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py", line 809, in run_stage
return self.run_train()
File "/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py", line 871, in run_train
self.train_loop.run_training_epoch()
File "/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/training_loop.py", line 491, in run_training_epoch
for batch_idx, (batch, is_last_batch) in train_dataloader:
File "/usr/local/lib/python3.7/dist-packages/pytorch_lightning/profiler/profilers.py", line 112, in profile_iterable
value = next(iterator)
File "/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/supporters.py", line 534, in prefetch_iterator
for val in it:
File "/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/supporters.py", line 464, in next
return self.request_next_batch(self.loader_iters)
File "/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/supporters.py", line 478, in request_next_batch
return apply_to_collection(loader_iters, Iterator, next)
File "/usr/local/lib/python3.7/dist-packages/pytorch_lightning/utilities/apply_func.py", line 85, in apply_to_collection
return function(data, *args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 521, in next
data = self._next_data()
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1203, in _next_data
return self._process_data(data)
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1229, in _process_data
data.reraise()
File "/usr/local/lib/python3.7/dist-packages/torch/_utils.py", line 434, in reraise
raise exception
TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/worker.py", line 287, in _worker_loop
data = fetcher.fetch(index)
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/fetch.py", line 49, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/content/drive/MyDrive/bart/dataset.py", line 46, in getitem
label_ids = self.tok.encode(instance['summary'])
File "/usr/local/lib/python3.7/dist-packages/transformers/tokenization_utils_base.py", line 2235, in encode
**kwargs,
File "/usr/local/lib/python3.7/dist-packages/transformers/tokenization_utils_base.py", line 2574, in encode_plus
**kwargs,
File "/usr/local/lib/python3.7/dist-packages/transformers/tokenization_utils_fast.py", line 516, in _encode_plus
**kwargs,
File "/usr/local/lib/python3.7/dist-packages/transformers/tokenization_utils_fast.py", line 429, in _batch_encode_plus
is_pretokenized=is_split_into_words,
TypeError: TextEncodeInput must be Union[TextInputSequence, Tuple[InputSequence, InputSequence]]
AI Hub에서 제공하는 신문기사 뿐만 아니라 기고문 잡지기사 법원 판결문 모두 훈련에 사용하기 위해서 모두 합치고 포맷을 동일하게 맞춰 tsv 파일로 저장하여 적용했는데 이런 에러가 계속해서 발생합니다.
제 데이터의 문제인건지 아니면 코드 문제인건지 궁금합니다.
현재 코랩 프로 플러스로 진행하고 있습니다.
@jamani135 주신 내용을 보면 요약문을 encoding 할 때 에러가 나오는 것으로 보입니다. 요약문 입력 부분을 확인해보시기 바랍니다.