mmpose
mmpose copied to clipboard
webcam_demo.py
I used my trained model to run the camera demo. When no one was detected, everything was normal. When someone was detected, the error was as follows
I tried to print the texts:
Hi, thanks for using MMPose. It seems like a problem with the format of dataset metainfo. Could you please provide the information about the configs (webcam & pose estimator)? They will help us find out where the problem lies.
yeah this is my config: base = ['../../../base/default_runtime.py',] channel_cfg = dict( num_output_channels=17, dataset_joints=17, dataset_channel=[ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], ], inference_channel=[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ])
runtime
train_cfg = dict(max_epochs=300, val_interval=50)
optimizer
optim_wrapper = dict(optimizer=dict( type='AdamW', lr=5e-3, ))
#resume = True
#load_from = None
log_config = dict( interval=50, hooks=[ dict(type='TextLoggerHook'), dict(type='TensorboardLoggerHook') ])
learning policy
param_scheduler = [ dict( type='LinearLR', begin=0, end=500, start_factor=0.001, by_epoch=False), # warm-up dict( type='MultiStepLR', begin=0, end=300, milestones=[170, 260], gamma=0.1, by_epoch=True) ]
automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=512)
hooks
default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
codec settings
codec = dict( type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
model settings
model = dict( type='TopdownPoseEstimator', data_preprocessor=dict( type='PoseDataPreprocessor', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], bgr_to_rgb=True), backbone=dict( type='MyMobileViT', model_cfg={'layer1': {'out_channels': 32, 'expand_ratio': 4, 'num_blocks': 1, 'stride': 1, 'block_type': 'mv2', 'dropout': 0.1, 'ffn_dropout': 0.0, 'attn_dropout': 0.0}, 'layer2': {'out_channels': 64, 'expand_ratio': 4, 'num_blocks': 3, 'stride': 2, 'block_type': 'mv2', 'dropout': 0.1, 'ffn_dropout': 0.0, 'attn_dropout': 0.0}, 'layer3': {'out_channels': 96, 'transformer_channels': 144, 'ffn_dim': 288, 'transformer_blocks': 2, 'patch_h': 2, 'patch_w': 2, 'stride': 2, 'mv_expand_ratio': 4, 'num_heads': 4, 'block_type': 'mobilevit', 'dropout': 0.1, 'ffn_dropout': 0.0, 'attn_dropout': 0.0}, 'layer4': {'out_channels': 128, 'transformer_channels': 192, 'ffn_dim': 384, 'transformer_blocks': 4, 'patch_h': 2, 'patch_w': 2, 'stride': 2, 'mv_expand_ratio': 4, 'num_heads': 4, 'block_type': 'mobilevit', 'dropout': 0.1, 'ffn_dropout': 0.0, 'attn_dropout': 0.0}, 'layer5': {'out_channels': 160, 'transformer_channels': 240, 'ffn_dim': 480, 'transformer_blocks': 3, 'patch_h': 2, 'patch_w': 2, 'stride': 2, 'mv_expand_ratio': 4, 'num_heads': 4, 'block_type': 'mobilevit', 'dropout': 0.1, 'ffn_dropout': 0.0, 'attn_dropout': 0.0}, 'last_layer_exp_factor': 4, 'cls_dropout': 0.1}, ), head=dict( type='TopdownHeatmapSimpleHead', in_channels=640, out_channels=channel_cfg['num_output_channels'], loss_keypoint=dict(type='KeypointMSELoss', use_target_weight=True), decoder=codec), test_cfg=dict( flip_test=True, flip_mode='heatmap', shift_heatmap=True, output_heatmaps=True ))
base dataset settings
dataset_type = 'CocoDataset' data_mode = 'topdown' data_root = '/data/zgchen/ViTPose/tools/data/coco/'
pipelines
train_pipeline = [ dict(type='LoadImage', file_client_args={{base.file_client_args}}), dict(type='GetBBoxCenterScale'), dict(type='RandomFlip', direction='horizontal'), dict(type='RandomHalfBody'), dict(type='RandomBBoxTransform'), dict(type='TopdownAffine', input_size=codec['input_size']), dict(type='GenerateTarget', target_type='heatmap', encoder=codec), dict(type='PackPoseInputs') ] val_pipeline = [ dict(type='LoadImage', file_client_args={{base.file_client_args}}), dict(type='GetBBoxCenterScale',padding=1.5), dict(type='TopdownAffine', input_size=codec['input_size']), dict(type='PackPoseInputs'), ]
data loaders
train_dataloader = dict( batch_size=64, num_workers=4, persistent_workers=True, sampler=dict(type='DefaultSampler', shuffle=True), dataset=dict( type=dataset_type, data_root=data_root, data_mode=data_mode, ann_file='annotations/person_keypoints_train2017.json', data_prefix=dict(img='train2017/'), pipeline=train_pipeline, )) val_dataloader = dict( batch_size=64, num_workers=4, persistent_workers=True, drop_last=False, sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), dataset=dict( type=dataset_type, data_root=data_root, data_mode=data_mode, ann_file='annotations/person_keypoints_val2017.json', bbox_file='/data/zgchen/ViTPose/tools/data/coco/person_detection_results/' 'COCO_val2017_detections_AP_H_56_person.json', data_prefix=dict(img='val2017/'), test_mode=True, pipeline=val_pipeline, )) test_dataloader = val_dataloader
evaluators
val_evaluator = dict( type='CocoMetric', ann_file=data_root + 'annotations/person_keypoints_val2017.json') test_evaluator = val_evaluator
and this is the cam_cfg:
Copyright (c) OpenMMLab. All rights reserved.
executor_cfg = dict(
# Basic configurations of the executor
name='Pose Estimation',
camera_id=0,
# Define nodes.
# The configuration of a node usually includes:
# 1. 'type': Node class name
# 2. 'name': Node name
# 3. I/O buffers (e.g. 'input_buffer', 'output_buffer'): specify the
# input and output buffer names. This may depend on the node class.
# 4. 'enable_key': assign a hot-key to toggle enable/disable this node.
# This may depend on the node class.
# 5. Other class-specific arguments
nodes=[
# 'DetectorNode':
# This node performs object detection from the frame image using an
# MMDetection model.
dict(
type='DetectorNode',
name='detector',
model_config='D://pythonProject//mmpose//demo//mmdetection_cfg//'
'ssdlite_mobilenetv2-scratch_8xb24-600e_coco.py',
model_checkpoint='https://download.openmmlab.com'
'/mmdetection/v2.0/ssd/'
'ssdlite_mobilenetv2_scratch_600e_coco/ssdlite_mobilenetv2_'
'scratch_600e_coco_20210629_110627-974d9307.pth',
input_buffer='input', # _input_
is an executor-reserved buffer
output_buffer='det_result'),
# 'TopDownPoseEstimatorNode':
# This node performs keypoint detection from the frame image using an
# MMPose top-down model. Detection results is needed.
dict(
type='TopDownPoseEstimatorNode',
name='human pose estimator',
model_config='D:/pythonProject/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mobilevit_coco-256x192.py',
model_checkpoint="D:/pythonProject/mmpose/work_dirs/AP_epoch_300.pth",
labels=['person'],
input_buffer='det_result',
output_buffer='human_pose'),
dict(
type='TopDownPoseEstimatorNode',
name='animal pose estimator',
model_config='configs/animal_2d_keypoint/topdown_heatmap/'
'animalpose/td-hm_hrnet-w32_8xb64-210e_animalpose-256x256.py',
model_checkpoint='https://download.openmmlab.com/mmpose/animal/'
'hrnet/hrnet_w32_animalpose_256x256-1aa7f075_20210426.pth',
labels=['cat', 'dog', 'horse', 'sheep', 'cow'],
input_buffer='human_pose',
output_buffer='animal_pose'),
# 'ObjectAssignerNode':
# This node binds the latest model inference result with the current
# frame. (This means the frame image and inference result may be
# asynchronous).
dict(
type='ObjectAssignerNode',
name='object assigner',
frame_buffer='frame', # _frame_
is an executor-reserved buffer
object_buffer='animal_pose',
output_buffer='frame'),
# 'ObjectVisualizerNode':
# This node draw the pose visualization result in the frame image.
# Pose results is needed.
dict(
type='ObjectVisualizerNode',
name='object visualizer',
enable_key='v',
enable=True,
show_bbox=True,
must_have_keypoint=False,
show_keypoint=True,
input_buffer='frame',
output_buffer='vis'),
# 'SunglassesNode':
# This node draw the sunglasses effect in the frame image.
# Pose results is needed.
dict(
type='SunglassesEffectNode',
name='sunglasses',
enable_key='s',
enable=False,
input_buffer='vis',
output_buffer='vis_sunglasses'),
# # 'BigeyeEffectNode':
# # This node draw the big-eye effetc in the frame image.
# # Pose results is needed.
dict(
type='BigeyeEffectNode',
name='big-eye',
enable_key='b',
enable=False,
input_buffer='vis_sunglasses',
output_buffer='vis_bigeye'),
# 'NoticeBoardNode':
# This node show a notice board with given content, e.g. help
# information.
dict(
type='NoticeBoardNode',
name='instruction',
enable_key='h',
enable=True,
input_buffer='vis_bigeye',
output_buffer='vis_notice',
content_lines=[
'This is a demo for pose visualization and simple image '
'effects. Have fun!', '', 'Hot-keys:',
'"v": Pose estimation result visualization',
'"s": Sunglasses effect B-)', '"b": Big-eye effect 0_0',
'"h": Show help information',
'"m": Show diagnostic information', '"q": Exit'
],
),
# 'MonitorNode':
# This node show diagnostic information in the frame image. It can
# be used for debugging or monitoring system resource status.
dict(
type='MonitorNode',
name='monitor',
enable_key='m',
enable=False,
input_buffer='vis_notice',
output_buffer='display'),
# 'RecorderNode':
# This node save the output video into a file.
dict(
type='RecorderNode',
name='recorder',
out_video_file='webcam_demo.mp4',
input_buffer='display',
output_buffer='display'
# _display_
is an executor-reserved buffer
)
])
This bug is caused by the distinct format of metainfo in MMPose and MMDetection. Thank you very much for pointing it out. You can modify your code as in https://github.com/open-mmlab/mmpose/pull/1813 to fix this bug.