(deformable_detr) root@1d4f89a0883e:/data/tmp/MOTR# sh configs/r50_motr_train.sh
Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
| distributed init (rank 1): env://
| distributed init (rank 0): env://
git:
sha: 8690da3392159635ca37c31975126acf40220724, status: has uncommited changes, branch: main
Namespace(accurate_ratio=False, aux_loss=True, backbone='resnet50', batch_size=1, bbox_loss_coef=5, cache_mode=False, cj=False, clip_max_norm=0.1, cls_loss_coef=2, coco_panoptic_path=None, coco_path='/data/workspace/detectron2/datasets/coco/', crop=False, data_txt_path_train='./datasets/data_path/crowdhuman.train', data_txt_path_val='./datasets/data_path/crowdhuman.val', dataset_file='e2e_joint', dec_layers=6, dec_n_points=4, decoder_cross_self=False, device='cuda', dice_loss_coef=1, dilation=False, dim_feedforward=1024, dist_backend='nccl', dist_url='env://', distributed=True, dropout=0.0, enable_fpn=False, enc_layers=6, enc_n_points=4, epochs=200, eval=False, exp_name='submit', extra_track_attn=True, filter_ignore=False, focal_alpha=0.25, fp_ratio=0.3, frozen_weights=None, giou_loss_coef=2, gpu=0, gt_file_train=None, gt_file_val=None, hidden_dim=256, img_path='data/valid/JPEGImages/', input_video='figs/demo.mp4', loss_normalizer=False, lr=0.0002, lr_backbone=2e-05, lr_backbone_names=['backbone.0'], lr_drop=100, lr_drop_epochs=None, lr_linear_proj_mult=0.1, lr_linear_proj_names=['reference_points', 'sampling_offsets'], mask_loss_coef=1, masks=False, max_size=1333, memory_bank_len=4, memory_bank_score_thresh=0.0, memory_bank_type=None, memory_bank_with_self_attn=False, merger_dropout=0.0, meta_arch='motr', mix_match=False, mot_path='/data/tmp/MOTR/data/', nheads=8, num_anchors=1, num_feature_levels=4, num_queries=300, num_workers=2, output_dir='exps/e2e_motr_r50_joint', position_embedding='sine', position_embedding_scale=6.283185307179586, pretrained=None, query_interaction_layer='QIM', random_drop=0.1, rank=0, remove_difficult=False, resume='', sample_interval=10, sample_mode='random_interval', sampler_lengths=[2, 3, 4, 5], sampler_steps=[50, 90, 150], save_period=50, seed=42, set_cost_bbox=5, set_cost_class=2, set_cost_giou=2, sgd=False, sigmoid_attn=False, start_epoch=0, two_stage=False, update_query_pos=True, use_checkpoint=False, val_width=800, vis=False, weight_decay=0.0001, with_box_refine=True, world_size=2)
Training with Extra Self Attention in Every Decoder.
Training with Self-Cross Attention.
number of params: 43912992
register 1-th video: /data/tmp/MOTR/data/crowdhuman/labels_with_ids/train
sampler_steps=[50, 90, 150] lenghts=[2, 3, 4, 5]
register 1-th video: /data/tmp/MOTR/data/crowdhuman/labels_with_ids/val
sampler_steps=[50, 90, 150] lenghts=[2, 3, 4, 5]
Start training
set epoch: epoch 0 period_idx=0
set epoch: epoch 0 period_idx=0
Traceback (most recent call last):
File "main.py", line 386, in
main(args)
File "main.py", line 332, in main
train_stats = train_func(
File "/data/tmp/MOTR/engine.py", line 104, in train_one_epoch_mot
for data_dict in metric_logger.log_every(data_loader, print_freq, header):
File "/data/tmp/MOTR/util/misc.py", line 260, in log_every
for obj in iterable:
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 517, in next
data = self._next_data()
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1199, in _next_data
return self._process_data(data)
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1225, in _process_data
data.reraise()
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/_utils.py", line 429, in reraise
raise self.exc_type(msg)
ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 202, in _worker_loop
data = fetcher.fetch(index)
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 44, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/data/tmp/MOTR/datasets/joint.py", line 175, in getitem
images, targets = self.pre_continuous_frames(sample_start, sample_end, sample_interval)
File "/data/tmp/MOTR/datasets/joint.py", line 168, in pre_continuous_frames
img_i, targets_i = self._pre_single_frame(i)
File "/data/tmp/MOTR/datasets/joint.py", line 121, in _pre_single_frame
raise ValueError('invalid label path: {}'.format(label_path))
ValueError: invalid label path: /data/tmp/MOTR/data/crowdhuman/labels_with_ids/train/283554,ff900035056259.txt
Traceback (most recent call last):
File "main.py", line 386, in
main(args)
File "main.py", line 332, in main
train_stats = train_func(
File "/data/tmp/MOTR/engine.py", line 104, in train_one_epoch_mot
for data_dict in metric_logger.log_every(data_loader, print_freq, header):
File "/data/tmp/MOTR/util/misc.py", line 260, in log_every
for obj in iterable:
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 517, in next
data = self._next_data()
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1199, in _next_data
return self._process_data(data)
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1225, in _process_data
data.reraise()
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/_utils.py", line 429, in reraise
raise self.exc_type(msg)
ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 202, in _worker_loop
data = fetcher.fetch(index)
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 44, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/data/tmp/MOTR/datasets/joint.py", line 175, in getitem
images, targets = self.pre_continuous_frames(sample_start, sample_end, sample_interval)
File "/data/tmp/MOTR/datasets/joint.py", line 168, in pre_continuous_frames
img_i, targets_i = self._pre_single_frame(i)
File "/data/tmp/MOTR/datasets/joint.py", line 121, in _pre_single_frame
raise ValueError('invalid label path: {}'.format(label_path))
ValueError: invalid label path: /data/tmp/MOTR/data/crowdhuman/labels_with_ids/train/273275,d79e3000d97c2336.txt
Killing subprocess 63352
Killing subprocess 63353
Traceback (most recent call last):
File "/opt/conda/envs/deformable_detr/lib/python3.8/runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/opt/conda/envs/deformable_detr/lib/python3.8/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/distributed/launch.py", line 340, in
main()
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/distributed/launch.py", line 326, in main
sigkill_handler(signal.SIGTERM, None) # not coming back
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/distributed/launch.py", line 301, in sigkill_handler
raise subprocess.CalledProcessError(returncode=last_return_code, cmd=cmd)
subprocess.CalledProcessError: Command '['/opt/conda/envs/deformable_detr/bin/python3', '-u', 'main.py', '--meta_arch', 'motr', '--epoch', '200', '--with_box_refine', '--lr_drop', '100', '--lr', '2e-4', '--lr_backbone', '2e-5', '--output_dir', 'exps/e2e_motr_r50_joint', '--batch_size', '1', '--sample_mode', 'random_interval', '--sample_interval', '10', '--sampler_steps', '50', '90', '150', '--sampler_lengths', '2', '3', '4', '5', '--update_query_pos', '--merger_dropout', '0', '--dropout', '0', '--random_drop', '0.1', '--fp_ratio', '0.3', '--query_interaction_layer', 'QIM', '--extra_track_attn', '--data_txt_path_train', './datasets/data_path/crowdhuman.train', '--data_txt_path_val', './datasets/data_path/crowdhuman.val', '--mot_path', '/data/tmp/MOTR/data/', '--dataset_file', 'e2e_joint']' returned non-zero exit status 1.