Suitable module in point_rend
Describe the question you meet
[here]Sorry, I can't find a suitable Module in Point_rend for CWD algorithm. Which module of Point_rend do I fill in in student_module
Post related information
Your config file if you modified it or created a new one.
[here][here]_base_ = [
'../../_base_/datasets/mmdet/coco_instance.py',
'../../_base_/schedules/mmdet/schedule_1x.py',
'../../_base_/mmdet_runtime.py'
]
# model settings
student = dict(
type='mmdet.PointRend',
backbone=dict(
type='ResNet',
depth=18,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True,
style='caffe'),
neck=dict(
type='FPN',
in_channels=[64, 128, 256, 512],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
roi_head=dict(
type='PointRendRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=4,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
mask_roi_extractor=dict(
type='GenericRoIExtractor',
aggregation='concat',
roi_layer=dict(
type='SimpleRoIAlign', output_size=14),
out_channels=256,
featmap_strides=[4]),
mask_head=dict(
type='CoarseMaskHead',
num_fcs=2,
in_channels=256,
conv_out_channels=256,
fc_out_channels=1024,
num_classes=4,
loss_mask=dict(
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)),
point_head=dict(
type='MaskPointHead',
num_fcs=3,
in_channels=256,
fc_channels=256,
num_classes=4,
coarse_pred_each_layer=True,
loss_point=dict(
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
# model training and testing settings
train_cfg=dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=-1,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_pre=2000,
max_per_img=1000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=7,
num_points=14 * 14,
oversample_ratio=3,
importance_sample_ratio=0.75,
pos_weight=-1,
debug=False)),
test_cfg=dict(
rpn=dict(
nms_pre=1000, # 在nms之前保留的的得分最高的proposal数量
max_per_img=1000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100,
mask_thr_binary=0.5,
subdivision_steps=5,
subdivision_num_points=28 * 28,
scale_factor=2)))
checkpoint = '/media/jidong/code/xuhao/mmdetection/load/point_rend_r50_caffe_fpn_mstrain_3x_coco-e0ebb6b7.pth'
teacher = dict(
type='mmdet.PointRend',
init_cfg=dict(type='Pretrained', checkpoint=checkpoint),
backbone=dict(
type='ResNet',
depth=101,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True,
style='caffe',
init_cfg=dict(
type='Pretrained',
checkpoint='open-mmlab://detectron2/resnet101_caffe')),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
roi_head=dict(
type='PointRendRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=4,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
mask_roi_extractor=dict(
type='GenericRoIExtractor',
aggregation='concat',
roi_layer=dict(
type='SimpleRoIAlign', output_size=14),
out_channels=256,
featmap_strides=[4]),
mask_head=dict(
type='CoarseMaskHead',
num_fcs=2,
in_channels=256,
conv_out_channels=256,
fc_out_channels=1024,
num_classes=4,
loss_mask=dict(
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)),
point_head=dict(
type='MaskPointHead',
num_fcs=3,
in_channels=256,
fc_channels=256,
num_classes=4,
coarse_pred_each_layer=True,
loss_point=dict(
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
# model training and testing settings
train_cfg=dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=-1,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_pre=2000,
max_per_img=1000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=7,
num_points=14 * 14,
oversample_ratio=3,
importance_sample_ratio=0.75,
pos_weight=-1,
debug=False)),
test_cfg=dict(
rpn=dict(
nms_pre=1000, # 在nms之前保留的的得分最高的proposal数量
max_per_img=1000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
# nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05),
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100,
mask_thr_binary=0.5,
subdivision_steps=5,
subdivision_num_points=28 * 28,
scale_factor=2)))
algorithm = dict(
type='GeneralDistill',
architecture=dict(
type='MMDetArchitecture',
model=student,
),
with_student_loss=True,
with_teacher_loss=False,
distiller=dict(
type='SingleTeacherDistiller',
teacher=teacher,
teacher_trainable=False,
teacher_norm_eval=True,
components=[
dict(
student_module='roi_head.bbox_head',
teacher_module='roi_head.bbox_head',
losses=[
dict(
type='WSLD',
name='loss_wsld',
tau=2,
loss_weight=2.5,
num_classes=4)
])
]),
)
find_unused_parameters = True
Sorry, we don't know the point rend model very well and can't give very accurate recommendations.
At present, there is no very general KD algorithm that can be applied to various models.
Later, I successfully ran through Point_rend with CWD, but at the beginning of the training, all the losses were Nan. What is the reason
2022-06-27 15:25:49,260 - mmdet - INFO - workflow: [('train', 1)], max: 50 epochs 2022-06-27 15:25:49,260 - mmdet - INFO - Checkpoints will be saved to /media/jidong/code/xuhao/mmrazor-master/test by HardDiskBackend. [W TensorIterator.cpp:918] Warning: Mixed memory format inputs detected while calling the operator. The operator will output contiguous tensor even if some of the inputs are in channels_last format. (function operator()) 2022-06-27 15:26:10,490 - mmdet - INFO - Epoch [1][50/3000] lr: 1.978e-03, eta: 17:37:51, time: 0.423, data_time: 0.047, memory: 1774, student.loss_rpn_cls: nan, student.loss_rpn_bbox: nan, student.loss_cls: nan, student.acc: 95.1568, student.loss_bbox: nan, student.loss_mask: nan, student.loss_point: nan, distiller.loss_cwd_logits.0: nan, distiller.loss_cwd_logits.1: nan, distiller.loss_cwd_logits.2: nan, distiller.loss_cwd_logits.3: nan, distiller.loss_cwd_logits.4: nan, loss: nan 2022-06-27 15:26:26,640 - mmdet - INFO - Epoch [1][100/3000] lr: 3.976e-03, eta: 15:32:22, time: 0.323, data_time: 0.005, memory: 1774, student.loss_rpn_cls: nan, student.loss_rpn_bbox: nan, student.loss_cls: nan, student.acc: 61.3786, student.loss_bbox: nan, student.loss_mask: nan, student.loss_point: nan, distiller.loss_cwd_logits.0: nan, distiller.loss_cwd_logits.1: nan, distiller.loss_cwd_logits.2: nan, distiller.loss_cwd_logits.3: nan, distiller.loss_cwd_logits.4: nan, loss: nan 2022-06-27 15:26:42,881 - mmdet - INFO - Epoch [1][150/3000] lr: 5.974e-03, eta: 14:51:50, time: 0.325, data_time: 0.005, memory: 1774, student.loss_rpn_cls: nan, student.loss_rpn_bbox: nan, student.loss_cls: nan, student.acc: 55.9667, student.loss_bbox: nan, student.loss_mask: nan, student.loss_point: nan, distiller.loss_cwd_logits.0: nan, distiller.loss_cwd_logits.1: nan, distiller.loss_cwd_logits.2: nan, distiller.loss_cwd_logits.3: nan, distiller.loss_cwd_logits.4: nan, loss: nan 2022-06-27 15:26:59,470 - mmdet - INFO - Epoch [1][200/3000] lr: 7.972e-03, eta: 14:35:22, time: 0.331, data_time: 0.005, memory: 1774, student.loss_rpn_cls: nan, student.loss_rpn_bbox: nan, student.loss_cls: nan, student.acc: 56.9762, student.loss_bbox: nan, student.loss_mask: nan, student.loss_point: nan, distiller.loss_cwd_logits.0: nan, distiller.loss_cwd_logits.1: nan, distiller.loss_cwd_logits.2: nan, distiller.loss_cwd_logits.3: nan, distiller.loss_cwd_logits.4: nan, loss: nan
Sorry, we don't know the point rend model very well and can't give very accurate recommendations. At present, there is no very general KD algorithm that can be applied to various models.
Excuse me, is there a solution to this problem