oneformer3d icon indicating copy to clipboard operation
oneformer3d copied to clipboard

2类分割loss异常,可能是数据处理问题吗 我没找到方法

Open sHicg1997 opened this issue 2 months ago • 1 comments

print(f"{filename}: unique={unique_vals}, min={min_val}, max={max_val}") scannet数据集的.bin内容scene0000_00.bin: unique=[ 972734782 986435695 989416459 ... 3158633955 3161070494 3163012110], min=972734782, max=3163012110 我的:0000020376.bin: unique=[ 972404271 974886227 983969715 ... 3237937842 3237954662 3237961435], min=972404271, max=3237961435 我的loss为:10/16 06:25:18 - mmengine - INFO - Epoch(train) [4][400/801] lr: 9.9471e-05 eta: 8:25:08 time: 0.0730 data_time: 0.0222 memory: 2300 loss: nan inst_loss: 0.0078 seg_loss: nan grad_norm: 0.0781 10/16 06:25:22 - mmengine - INFO - Epoch(train) [4][450/801] lr: 9.9471e-05 eta: 8:24:29 time: 0.0695 data_time: 0.0207 memory: 2318 loss: nan inst_loss: 0.0072 seg_loss: nan grad_norm: 0.0725

数据结果输出:
sem_mask = torch.tensor(input_dict['pts_semantic_mask'], dtype=torch.long) sem_mask = torch.nn.functional.one_hot(sem_mask, num_classes=self.num_classes + 1) pts_sem = np.array(input_dict['pts_semantic_mask']) print(np.unique(pts_sem), np.min(pts_sem), np.max(pts_sem)): [2] 2 2 [2] 2 2 [2] 2 2 [2] 2 2 [2] 2 2 [2] 2 2 这是我pkl文件内容之一: {'lidar_points': {'num_pts_feats': 6, 'lidar_path': '000965432942594.bin'}, 'instances': [{'bbox_3d': [-1.2202199697494507, -3.8518354892730713, -0.24315516650676727, 0.0, 0.0, 0.0], 'bbox_label_3d': 0}, {'bbox_3d': [-0.22716784477233887, 0.30283641815185547, 0.09186387062072754, 8.935768127441406, 11.749524116516113, 4.48917818069458], 'bbox_label_3d': 0}], 'pts_semantic_mask_path': '000965432942594.bin', 'pts_instance_mask_path': '000965432942594.bin', 'super_pts_path': '000965432942594.bin', 'axis_align_matrix': [[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 0.0, 0.0, 1.0]]}]}

base = [ 'mmdet3d::base/default_runtime.py', 'mmdet3d::base/datasets/scannet-seg.py' ] custom_imports = dict(imports=['oneformer3d'])

model settings

num_channels = 2 num_instance_classes = 2 num_semantic_classes = 2

model = dict( type='ScanNetOneFormer3D', data_preprocessor=dict(type='Det3DDataPreprocessor_'), in_channels=6, num_channels=num_channels, voxel_size=0.05, # 粗体素,减少显存 num_classes=num_instance_classes, min_spatial_shape=128, query_thr=0.5, backbone=dict( type='SpConvUNet', num_planes=[2, 3, 4, 5], # 更小 backbone return_blocks=True), decoder=dict( type='ScanNetQueryDecoder', num_layers=2, num_instance_queries=0, num_semantic_queries=0, num_instance_classes=num_instance_classes, num_semantic_classes=num_semantic_classes, num_semantic_linears=1, in_channels=2, d_model=16, num_heads=1, hidden_dim=64, dropout=0.0, activation_fn='gelu', iter_pred=True, attn_mask=False, fix_attention=True, objectness_flag=False), criterion=dict( type='ScanNetUnifiedCriterion', num_semantic_classes=num_semantic_classes, sem_criterion=dict( type='ScanNetSemanticCriterion', ignore_index=num_semantic_classes, loss_weight=0.2), inst_criterion=dict( type='InstanceCriterion', matcher=dict( type='SparseMatcher', costs=[ dict(type='QueryClassificationCost', weight=0.5), dict(type='MaskBCECost', weight=1.0), dict(type='MaskDiceCost', weight=1.0)], topk=1), loss_weight=[0.5, 1.0, 1.0, 0.5], num_classes=num_instance_classes, non_object_weight=0.1, fix_dice_loss_weight=True, iter_matcher=True, fix_mean_loss=True)), train_cfg=dict(), test_cfg=dict( topk_insts=60, inst_score_thr=0.0, pan_score_thr=0.5, npoint_thr=10, obj_normalization=True, sp_score_thr=0.4, nms=True, matrix_nms_kernel='linear', stuff_classes=[0, 1]))

dataset settings

dataset_type = 'ScanNetSegDataset_' data_prefix = dict( pts='points', pts_instance_mask='instance_mask', pts_semantic_mask='semantic_mask', sp_pts_mask='super_points')

train_pipeline = [ dict( type='LoadPointsFromFile', coord_type='DEPTH', shift_height=False, use_color=True, load_dim=6, use_dim=[0, 1, 2, 3, 4, 5]), dict( type='LoadAnnotations3D_', with_bbox_3d=False, with_label_3d=False, with_mask_3d=True, with_seg_3d=True, with_sp_mask_3d=True), dict(type='PointSegClassMapping'), dict(type='PointSample', num_points=8192), # 采样点数降低显存 dict( type='RandomFlip3D', sync_2d=False, flip_ratio_bev_horizontal=0.5, flip_ratio_bev_vertical=0.5), dict( type='GlobalRotScaleTrans', rot_range=[-3.14, 3.14], scale_ratio_range=[0.8, 1.2], translation_std=[0.1, 0.1, 0.1], shift_height=False), dict( type='NormalizePointsColor_', color_mean=[127.5, 127.5, 127.5]), dict( type='AddSuperPointAnnotations', num_classes=num_semantic_classes, stuff_classes=[0, 1], merge_non_stuff_cls=False), dict( type='ElasticTransfrom', gran=[6, 20], mag=[40, 160], voxel_size=0.02, p=0.5), dict( type='Pack3DDetInputs_', keys=[ 'points', 'gt_labels_3d', 'pts_semantic_mask', 'pts_instance_mask', 'sp_pts_mask', 'gt_sp_masks', 'elastic_coords' ]) ]

test_pipeline = [ dict( type='LoadPointsFromFile', coord_type='DEPTH', shift_height=False, use_color=True, load_dim=6, use_dim=[0, 1, 2, 3, 4, 5]), dict( type='LoadAnnotations3D_', with_bbox_3d=False, with_label_3d=False, with_mask_3d=True, with_seg_3d=True, with_sp_mask_3d=True), dict(type='PointSegClassMapping'), dict( type='MultiScaleFlipAug3D', img_scale=(1333, 800), pts_scale_ratio=1, flip=False, transforms=[ dict( type='NormalizePointsColor_', color_mean=[127.5, 127.5, 127.5]), dict( type='AddSuperPointAnnotations', num_classes=num_semantic_classes, stuff_classes=[0, 1], merge_non_stuff_cls=False), ]), dict(type='Pack3DDetInputs_', keys=['points', 'sp_pts_mask']) ]

run settings

train_dataloader = dict( batch_size=1, num_workers=1, dataset=dict( type=dataset_type, ann_file='scannet200_oneformer3d_infos_train.pkl', data_prefix=data_prefix, pipeline=train_pipeline, ignore_index=num_semantic_classes, scene_idxs=None, test_mode=False)) val_dataloader = dict( dataset=dict( type=dataset_type, ann_file='scannet200_oneformer3d_infos_val.pkl', data_prefix=data_prefix, pipeline=test_pipeline, ignore_index=num_semantic_classes, test_mode=True)) test_dataloader = val_dataloader

class_names = ['teeth', 'crown'] label2cat = {i: name for i, name in enumerate(class_names)} metric_meta = dict( label2cat=label2cat, ignore_index=[num_semantic_classes], classes=class_names, dataset_name='ScanNet')

sem_mapping = [0, 1] inst_mapping = sem_mapping[2:] val_evaluator = dict( type='UnifiedSegMetric', stuff_class_inds=[0, 1], thing_class_inds=list(range(2, num_semantic_classes)), min_num_points=1, id_offset=2**16, sem_mapping=sem_mapping, inst_mapping=inst_mapping, metric_meta=metric_meta) test_evaluator = val_evaluator

optim_wrapper = dict( type='AmpOptimWrapper', optimizer=dict(type='AdamW', lr=0.0001, weight_decay=0.05), clip_grad=dict(max_norm=10, norm_type=2), loss_scale="dynamic", accumulative_counts=2 # 梯度累积,等效 batch_size=2 )

param_scheduler = dict(type='PolyLR', begin=0, end=512, power=0.9) custom_hooks = [dict(type='EmptyCacheHook', after_iter=True)] default_hooks = dict( checkpoint=dict(interval=1, max_keep_ckpts=16))

load_from = '/data/detect/oneformer3d/checkpoints/sstnet_scannet2.pth'

train_cfg = dict( type='EpochBasedTrainLoop', max_epochs=512, dynamic_intervals=[(1, 16), (512 - 16, 1)]) val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop')

sHicg1997 avatar Oct 16 '25 06:10 sHicg1997

Looks like you are training with very low batch size. In this case if all scenes in the batch don't have ground truth objects, there may be NaN in the loss probably. I think debugging the scenes from first NaN batch may help.

filaPro avatar Oct 16 '25 15:10 filaPro