Error running PoseC3D
Hello,
I have clone the repo and did the requirement installation. When running the command below, i encountered some errors that seems to be from the data loader component.
bash tools/dist_train.sh configs/posec3d/slowonly_r50_ntu60_xsub/joint.py 2 --validate --test-last --test-best
here are the error messages.
File "/home/arptracker/Vincent/action_recognition/pyskl/tools/train.py", line 164, in <module>
dict_keys(['frame_dir', 'label', 'keypoint', 'total_frames', 'modality', 'start_index', 'test_mode', 'frame_inds', 'clip_len', 'frame_interval', 'num_clips'])
dict_keys(['frame_dir', 'label', 'keypoint', 'total_frames', 'modality', 'start_index', 'test_mode', 'frame_inds', 'clip_len', 'frame_interval', 'num_clips'])
dict_keys(['frame_dir', 'label', 'keypoint', 'total_frames', 'modality', 'start_index', 'test_mode', 'frame_inds', 'clip_len', 'frame_interval', 'num_clips'])
main()
File "/home/arptracker/Vincent/action_recognition/pyskl/tools/train.py", line 156, in main
train_model(model, datasets, cfg, validate=args.validate, test=test_option, timestamp=timestamp, meta=meta)
File "/home/arptracker/Vincent/action_recognition/pyskl/pyskl/apis/train.py", line 144, in train_model
runner.run(data_loaders, cfg.workflow, cfg.total_epochs)dict_keys(['frame_dir', 'label', 'keypoint', 'total_frames', 'modality', 'start_index', 'test_mode', 'frame_inds', 'clip_len', 'frame_interval', 'num_clips'])
File "/home/arptracker/Vincent/action_recognition/venv/lib/python3.10/site-packages/mmcv/runner/epoch_based_runner.py", line 127, in run
epoch_runner(data_loaders[i], **kwargs)
File "/home/arptracker/Vincent/action_recognition/venv/lib/python3.10/site-packages/mmcv/runner/epoch_based_runner.py", line 47, in train
for i, data_batch in enumerate(self.data_loader):
File "/home/arptracker/Vincent/action_recognition/venv/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 634, in __next__
data = self._next_data()
File "/home/arptracker/Vincent/action_recognition/venv/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1346, in _next_data
return self._process_data(data)
File "/home/arptracker/Vincent/action_recognition/venv/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1372, in _process_data
dict_keys(['frame_dir', 'label', 'keypoint', 'total_frames', 'modality', 'start_index', 'test_mode', 'frame_inds', 'clip_len', 'frame_interval', 'num_clips'])
data.reraise()
dict_keys(['frame_dir', 'label', 'keypoint', 'total_frames', 'modality', 'start_index', 'test_mode', 'frame_inds', 'clip_len', 'frame_interval', 'num_clips'])
dict_keys(['frame_dir', 'label', 'keypoint', 'total_frames', 'modality', 'start_index', 'test_mode', 'frame_inds', 'clip_len', 'frame_interval', 'num_clips'])
File "/home/arptracker/Vincent/action_recognition/venv/lib/python3.10/site-packages/torch/_utils.py", line 644, in reraise
raise exception
KeyError: Caught KeyError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/home/arptracker/Vincent/action_recognition/venv/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
data = fetcher.fetch(index)
File "/home/arptracker/Vincent/action_recognition/venv/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 51, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/home/arptracker/Vincent/action_recognition/venv/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 51, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/home/arptracker/Vincent/action_recognition/pyskl/pyskl/datasets/dataset_wrappers.py", line 34, in __getitem__
return self.dataset[idx % self._ori_len]
File "/home/arptracker/Vincent/action_recognition/pyskl/pyskl/datasets/base.py", line 322, in __getitem__
return self.prepare_test_frames(idx) if self.test_mode else self.prepare_train_frames(idx)
File "/home/arptracker/Vincent/action_recognition/pyskl/pyskl/datasets/base.py", line 273, in prepare_train_frames
return self.pipeline(results)
File "/home/arptracker/Vincent/action_recognition/pyskl/pyskl/datasets/pipelines/compose.py", line 41, in __call__
data = t(data)
File "/home/arptracker/Vincent/action_recognition/pyskl/pyskl/datasets/pipelines/augmentations.py", line 61, in __call__
img_shape = results['img_shape']
KeyError: 'img_shape'
dict_keys(['frame_dir', 'label', 'keypoint', 'total_frames', 'modality', 'start_index', 'test_mode', 'frame_inds', 'clip_len', 'frame_interval', 'num_clips'])
dict_keys(['frame_dir', 'label', 'keypoint', 'total_frames', 'modality', 'start_index', 'test_mode', 'frame_inds', 'clip_len', 'frame_interval', 'num_clips'])
dict_keys(['frame_dir', 'label', 'keypoint', 'total_frames', 'modality', 'start_index', 'test_mode', 'frame_inds', 'clip_len', 'frame_interval', 'num_clips'])
dict_keys(['frame_dir', 'label', 'keypoint', 'total_frames', 'modality', 'start_index', 'test_mode', 'frame_inds', 'clip_len', 'frame_interval', 'num_clips'])
Traceback (most recent call last):
File "/home/arptracker/Vincent/action_recognition/pyskl/tools/train.py", line 164, in <module>
dict_keys(['frame_dir', 'label', 'keypoint', 'total_frames', 'modality', 'start_index', 'test_mode', 'frame_inds', 'clip_len', 'frame_interval', 'num_clips'])
dict_keys(['frame_dir', 'label', 'keypoint', 'total_frames', 'modality', 'start_index', 'test_mode', 'frame_inds', 'clip_len', 'frame_interval', 'num_clips'])
main()
File "/home/arptracker/Vincent/action_recognition/pyskl/tools/train.py", line 156, in main
train_model(model, datasets, cfg, validate=args.validate, test=test_option, timestamp=timestamp, meta=meta)
File "/home/arptracker/Vincent/action_recognition/pyskl/pyskl/apis/train.py", line 144, in train_model
runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
File "/home/arptracker/Vincent/action_recognition/venv/lib/python3.10/site-packages/mmcv/runner/epoch_based_runner.py", line 127, in run
epoch_runner(data_loaders[i], **kwargs)
File "/home/arptracker/Vincent/action_recognition/venv/lib/python3.10/site-packages/mmcv/runner/epoch_based_runner.py", line 47, in train
for i, data_batch in enumerate(self.data_loader):
File "/home/arptracker/Vincent/action_recognition/venv/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 634, in __next__
dict_keys(['frame_dir', 'label', 'keypoint', 'total_frames', 'modality', 'start_index', 'test_mode', 'frame_inds', 'clip_len', 'frame_interval', 'num_clips'])
data = self._next_data()
File "/home/arptracker/Vincent/action_recognition/venv/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1346, in _next_data
return self._process_data(data)
File "/home/arptracker/Vincent/action_recognition/venv/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1372, in _process_data
data.reraise()
File "/home/arptracker/Vincent/action_recognition/venv/lib/python3.10/site-packages/torch/_utils.py", line 644, in reraise
raise exception
KeyError: Caught KeyError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/home/arptracker/Vincent/action_recognition/venv/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
data = fetcher.fetch(index)
File "/home/arptracker/Vincent/action_recognition/venv/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 51, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/home/arptracker/Vincent/action_recognition/venv/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 51, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/home/arptracker/Vincent/action_recognition/pyskl/pyskl/datasets/dataset_wrappers.py", line 34, in __getitem__
return self.dataset[idx % self._ori_len]
File "/home/arptracker/Vincent/action_recognition/pyskl/pyskl/datasets/base.py", line 322, in __getitem__
return self.prepare_test_frames(idx) if self.test_mode else self.prepare_train_frames(idx)
File "/home/arptracker/Vincent/action_recognition/pyskl/pyskl/datasets/base.py", line 273, in prepare_train_frames
return self.pipeline(results)
File "/home/arptracker/Vincent/action_recognition/pyskl/pyskl/datasets/pipelines/compose.py", line 41, in __call__
data = t(data)
File "/home/arptracker/Vincent/action_recognition/pyskl/pyskl/datasets/pipelines/augmentations.py", line 61, in __call__
img_shape = results['img_shape']
KeyError: 'img_shape'
dict_keys(['frame_dir', 'label', 'keypoint', 'total_frames', 'modality', 'start_index', 'test_mode', 'frame_inds', 'clip_len', 'frame_interval', 'num_clips'])
ERROR:torch.distributed.elastic.multiprocessing.api:failed (exitcode: 1) local_rank: 0 (pid: 23312) of binary: /home/arptracker/Vincent/action_recognition/venv/bin/python
Traceback (most recent call last):
File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/home/arptracker/Vincent/action_recognition/venv/lib/python3.10/site-packages/torch/distributed/launch.py", line 196, in <module>
main()
File "/home/arptracker/Vincent/action_recognition/venv/lib/python3.10/site-packages/torch/distributed/launch.py", line 192, in main
launch(args)
File "/home/arptracker/Vincent/action_recognition/venv/lib/python3.10/site-packages/torch/distributed/launch.py", line 177, in launch
run(args)
File "/home/arptracker/Vincent/action_recognition/venv/lib/python3.10/site-packages/torch/distributed/run.py", line 785, in run
elastic_launch(
File "/home/arptracker/Vincent/action_recognition/venv/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 134, in __call__
return launch_agent(self._config, self._entrypoint, list(args))
File "/home/arptracker/Vincent/action_recognition/venv/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 250, in launch_agent
raise ChildFailedError(
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
Could you kindly advice which version of the code is working to replicate your work? Note that i downloaded your processed dataset and placed it at the right directory.
Hi, Gbouna,
I just tried the latest version of the main branch on my computer, and it works fine. It looks like the error is due to that the img_shape field doesn't exist in your annotation file (but it should be there). To fix this bug, double check your annotation file and make sure you are using files downloaded from this page: https://github.com/kennymckormick/pyskl/tree/main/tools/data
Thanks for the prompt response, I will check this and update you.
After checking, the data I used was actually from here https://github.com/kennymckormick/pyskl/tree/main/tools/data The problem occurs when i use the 3D skeleton, i tried using the 2D skeleton and it is now training. Could you check to know why it is giving such error with the 3D skeleton? @kennymckormick