MOTR icon indicating copy to clipboard operation
MOTR copied to clipboard

why there is no *txt?Errors when train the model

Open tanglong-hub opened this issue 2 years ago • 1 comments

(deformable_detr) root@1d4f89a0883e:/data/tmp/MOTR# sh configs/r50_motr_train.sh


Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.


| distributed init (rank 1): env:// | distributed init (rank 0): env:// git: sha: 8690da3392159635ca37c31975126acf40220724, status: has uncommited changes, branch: main

Namespace(accurate_ratio=False, aux_loss=True, backbone='resnet50', batch_size=1, bbox_loss_coef=5, cache_mode=False, cj=False, clip_max_norm=0.1, cls_loss_coef=2, coco_panoptic_path=None, coco_path='/data/workspace/detectron2/datasets/coco/', crop=False, data_txt_path_train='./datasets/data_path/crowdhuman.train', data_txt_path_val='./datasets/data_path/crowdhuman.val', dataset_file='e2e_joint', dec_layers=6, dec_n_points=4, decoder_cross_self=False, device='cuda', dice_loss_coef=1, dilation=False, dim_feedforward=1024, dist_backend='nccl', dist_url='env://', distributed=True, dropout=0.0, enable_fpn=False, enc_layers=6, enc_n_points=4, epochs=200, eval=False, exp_name='submit', extra_track_attn=True, filter_ignore=False, focal_alpha=0.25, fp_ratio=0.3, frozen_weights=None, giou_loss_coef=2, gpu=0, gt_file_train=None, gt_file_val=None, hidden_dim=256, img_path='data/valid/JPEGImages/', input_video='figs/demo.mp4', loss_normalizer=False, lr=0.0002, lr_backbone=2e-05, lr_backbone_names=['backbone.0'], lr_drop=100, lr_drop_epochs=None, lr_linear_proj_mult=0.1, lr_linear_proj_names=['reference_points', 'sampling_offsets'], mask_loss_coef=1, masks=False, max_size=1333, memory_bank_len=4, memory_bank_score_thresh=0.0, memory_bank_type=None, memory_bank_with_self_attn=False, merger_dropout=0.0, meta_arch='motr', mix_match=False, mot_path='/data/tmp/MOTR/data/', nheads=8, num_anchors=1, num_feature_levels=4, num_queries=300, num_workers=2, output_dir='exps/e2e_motr_r50_joint', position_embedding='sine', position_embedding_scale=6.283185307179586, pretrained=None, query_interaction_layer='QIM', random_drop=0.1, rank=0, remove_difficult=False, resume='', sample_interval=10, sample_mode='random_interval', sampler_lengths=[2, 3, 4, 5], sampler_steps=[50, 90, 150], save_period=50, seed=42, set_cost_bbox=5, set_cost_class=2, set_cost_giou=2, sgd=False, sigmoid_attn=False, start_epoch=0, two_stage=False, update_query_pos=True, use_checkpoint=False, val_width=800, vis=False, weight_decay=0.0001, with_box_refine=True, world_size=2) Training with Extra Self Attention in Every Decoder. Training with Self-Cross Attention. number of params: 43912992 register 1-th video: /data/tmp/MOTR/data/crowdhuman/labels_with_ids/train sampler_steps=[50, 90, 150] lenghts=[2, 3, 4, 5] register 1-th video: /data/tmp/MOTR/data/crowdhuman/labels_with_ids/val sampler_steps=[50, 90, 150] lenghts=[2, 3, 4, 5] Start training set epoch: epoch 0 period_idx=0 set epoch: epoch 0 period_idx=0 Traceback (most recent call last): File "main.py", line 386, in main(args) File "main.py", line 332, in main train_stats = train_func( File "/data/tmp/MOTR/engine.py", line 104, in train_one_epoch_mot for data_dict in metric_logger.log_every(data_loader, print_freq, header): File "/data/tmp/MOTR/util/misc.py", line 260, in log_every for obj in iterable: File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 517, in next data = self._next_data() File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1199, in _next_data return self._process_data(data) File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1225, in _process_data data.reraise() File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/_utils.py", line 429, in reraise raise self.exc_type(msg) ValueError: Caught ValueError in DataLoader worker process 0. Original Traceback (most recent call last): File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 202, in _worker_loop data = fetcher.fetch(index) File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch data = [self.dataset[idx] for idx in possibly_batched_index] File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 44, in data = [self.dataset[idx] for idx in possibly_batched_index] File "/data/tmp/MOTR/datasets/joint.py", line 175, in getitem images, targets = self.pre_continuous_frames(sample_start, sample_end, sample_interval) File "/data/tmp/MOTR/datasets/joint.py", line 168, in pre_continuous_frames img_i, targets_i = self._pre_single_frame(i) File "/data/tmp/MOTR/datasets/joint.py", line 121, in _pre_single_frame raise ValueError('invalid label path: {}'.format(label_path)) ValueError: invalid label path: /data/tmp/MOTR/data/crowdhuman/labels_with_ids/train/283554,ff900035056259.txt

Traceback (most recent call last): File "main.py", line 386, in main(args) File "main.py", line 332, in main train_stats = train_func( File "/data/tmp/MOTR/engine.py", line 104, in train_one_epoch_mot for data_dict in metric_logger.log_every(data_loader, print_freq, header): File "/data/tmp/MOTR/util/misc.py", line 260, in log_every for obj in iterable: File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 517, in next data = self._next_data() File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1199, in _next_data return self._process_data(data) File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1225, in _process_data data.reraise() File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/_utils.py", line 429, in reraise raise self.exc_type(msg) ValueError: Caught ValueError in DataLoader worker process 0. Original Traceback (most recent call last): File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 202, in _worker_loop data = fetcher.fetch(index) File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch data = [self.dataset[idx] for idx in possibly_batched_index] File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 44, in data = [self.dataset[idx] for idx in possibly_batched_index] File "/data/tmp/MOTR/datasets/joint.py", line 175, in getitem images, targets = self.pre_continuous_frames(sample_start, sample_end, sample_interval) File "/data/tmp/MOTR/datasets/joint.py", line 168, in pre_continuous_frames img_i, targets_i = self._pre_single_frame(i) File "/data/tmp/MOTR/datasets/joint.py", line 121, in _pre_single_frame raise ValueError('invalid label path: {}'.format(label_path)) ValueError: invalid label path: /data/tmp/MOTR/data/crowdhuman/labels_with_ids/train/273275,d79e3000d97c2336.txt

Killing subprocess 63352 Killing subprocess 63353 Traceback (most recent call last): File "/opt/conda/envs/deformable_detr/lib/python3.8/runpy.py", line 194, in _run_module_as_main return _run_code(code, main_globals, None, File "/opt/conda/envs/deformable_detr/lib/python3.8/runpy.py", line 87, in _run_code exec(code, run_globals) File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/distributed/launch.py", line 340, in main() File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/distributed/launch.py", line 326, in main sigkill_handler(signal.SIGTERM, None) # not coming back File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/distributed/launch.py", line 301, in sigkill_handler raise subprocess.CalledProcessError(returncode=last_return_code, cmd=cmd) subprocess.CalledProcessError: Command '['/opt/conda/envs/deformable_detr/bin/python3', '-u', 'main.py', '--meta_arch', 'motr', '--epoch', '200', '--with_box_refine', '--lr_drop', '100', '--lr', '2e-4', '--lr_backbone', '2e-5', '--output_dir', 'exps/e2e_motr_r50_joint', '--batch_size', '1', '--sample_mode', 'random_interval', '--sample_interval', '10', '--sampler_steps', '50', '90', '150', '--sampler_lengths', '2', '3', '4', '5', '--update_query_pos', '--merger_dropout', '0', '--dropout', '0', '--random_drop', '0.1', '--fp_ratio', '0.3', '--query_interaction_layer', 'QIM', '--extra_track_attn', '--data_txt_path_train', './datasets/data_path/crowdhuman.train', '--data_txt_path_val', './datasets/data_path/crowdhuman.val', '--mot_path', '/data/tmp/MOTR/data/', '--dataset_file', 'e2e_joint']' returned non-zero exit status 1.

tanglong-hub avatar Jul 19 '22 12:07 tanglong-hub

Have you tried data preparation to generate txt files in labels_with_ids? Maybe you could try data preparation in FairMOT.

zyayoung avatar Jul 20 '22 08:07 zyayoung