mmpose icon indicating copy to clipboard operation
mmpose copied to clipboard

webcam_demo.py

Open ChenZhenGui opened this issue 2 years ago • 4 comments

I used my trained model to run the camera demo. When no one was detected, everything was normal. When someone was detected, the error was as follows 1668671594181 I tried to print the texts: 1668671653532

ChenZhenGui avatar Nov 17 '22 07:11 ChenZhenGui

Hi, thanks for using MMPose. It seems like a problem with the format of dataset metainfo. Could you please provide the information about the configs (webcam & pose estimator)? They will help us find out where the problem lies.

Ben-Louis avatar Nov 17 '22 09:11 Ben-Louis

yeah this is my config: base = ['../../../base/default_runtime.py',] channel_cfg = dict( num_output_channels=17, dataset_joints=17, dataset_channel=[ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], ], inference_channel=[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ])

runtime

train_cfg = dict(max_epochs=300, val_interval=50)

optimizer

optim_wrapper = dict(optimizer=dict( type='AdamW', lr=5e-3, ))

#resume = True

#load_from = None

log_config = dict( interval=50, hooks=[ dict(type='TextLoggerHook'), dict(type='TensorboardLoggerHook') ])

learning policy

param_scheduler = [ dict( type='LinearLR', begin=0, end=500, start_factor=0.001, by_epoch=False), # warm-up dict( type='MultiStepLR', begin=0, end=300, milestones=[170, 260], gamma=0.1, by_epoch=True) ]

automatically scaling LR based on the actual training batch size

auto_scale_lr = dict(base_batch_size=512)

hooks

default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))

codec settings

codec = dict( type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)

model settings

model = dict( type='TopdownPoseEstimator', data_preprocessor=dict( type='PoseDataPreprocessor', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], bgr_to_rgb=True), backbone=dict( type='MyMobileViT', model_cfg={'layer1': {'out_channels': 32, 'expand_ratio': 4, 'num_blocks': 1, 'stride': 1, 'block_type': 'mv2', 'dropout': 0.1, 'ffn_dropout': 0.0, 'attn_dropout': 0.0}, 'layer2': {'out_channels': 64, 'expand_ratio': 4, 'num_blocks': 3, 'stride': 2, 'block_type': 'mv2', 'dropout': 0.1, 'ffn_dropout': 0.0, 'attn_dropout': 0.0}, 'layer3': {'out_channels': 96, 'transformer_channels': 144, 'ffn_dim': 288, 'transformer_blocks': 2, 'patch_h': 2, 'patch_w': 2, 'stride': 2, 'mv_expand_ratio': 4, 'num_heads': 4, 'block_type': 'mobilevit', 'dropout': 0.1, 'ffn_dropout': 0.0, 'attn_dropout': 0.0}, 'layer4': {'out_channels': 128, 'transformer_channels': 192, 'ffn_dim': 384, 'transformer_blocks': 4, 'patch_h': 2, 'patch_w': 2, 'stride': 2, 'mv_expand_ratio': 4, 'num_heads': 4, 'block_type': 'mobilevit', 'dropout': 0.1, 'ffn_dropout': 0.0, 'attn_dropout': 0.0}, 'layer5': {'out_channels': 160, 'transformer_channels': 240, 'ffn_dim': 480, 'transformer_blocks': 3, 'patch_h': 2, 'patch_w': 2, 'stride': 2, 'mv_expand_ratio': 4, 'num_heads': 4, 'block_type': 'mobilevit', 'dropout': 0.1, 'ffn_dropout': 0.0, 'attn_dropout': 0.0}, 'last_layer_exp_factor': 4, 'cls_dropout': 0.1}, ), head=dict( type='TopdownHeatmapSimpleHead', in_channels=640, out_channels=channel_cfg['num_output_channels'], loss_keypoint=dict(type='KeypointMSELoss', use_target_weight=True), decoder=codec), test_cfg=dict( flip_test=True, flip_mode='heatmap', shift_heatmap=True, output_heatmaps=True ))

base dataset settings

dataset_type = 'CocoDataset' data_mode = 'topdown' data_root = '/data/zgchen/ViTPose/tools/data/coco/'

pipelines

train_pipeline = [ dict(type='LoadImage', file_client_args={{base.file_client_args}}), dict(type='GetBBoxCenterScale'), dict(type='RandomFlip', direction='horizontal'), dict(type='RandomHalfBody'), dict(type='RandomBBoxTransform'), dict(type='TopdownAffine', input_size=codec['input_size']), dict(type='GenerateTarget', target_type='heatmap', encoder=codec), dict(type='PackPoseInputs') ] val_pipeline = [ dict(type='LoadImage', file_client_args={{base.file_client_args}}), dict(type='GetBBoxCenterScale',padding=1.5), dict(type='TopdownAffine', input_size=codec['input_size']), dict(type='PackPoseInputs'), ]

data loaders

train_dataloader = dict( batch_size=64, num_workers=4, persistent_workers=True, sampler=dict(type='DefaultSampler', shuffle=True), dataset=dict( type=dataset_type, data_root=data_root, data_mode=data_mode, ann_file='annotations/person_keypoints_train2017.json', data_prefix=dict(img='train2017/'), pipeline=train_pipeline, )) val_dataloader = dict( batch_size=64, num_workers=4, persistent_workers=True, drop_last=False, sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), dataset=dict( type=dataset_type, data_root=data_root, data_mode=data_mode, ann_file='annotations/person_keypoints_val2017.json', bbox_file='/data/zgchen/ViTPose/tools/data/coco/person_detection_results/' 'COCO_val2017_detections_AP_H_56_person.json', data_prefix=dict(img='val2017/'), test_mode=True, pipeline=val_pipeline, )) test_dataloader = val_dataloader

evaluators

val_evaluator = dict( type='CocoMetric', ann_file=data_root + 'annotations/person_keypoints_val2017.json') test_evaluator = val_evaluator

ChenZhenGui avatar Nov 17 '22 09:11 ChenZhenGui

and this is the cam_cfg:

Copyright (c) OpenMMLab. All rights reserved.

executor_cfg = dict( # Basic configurations of the executor name='Pose Estimation', camera_id=0, # Define nodes. # The configuration of a node usually includes: # 1. 'type': Node class name # 2. 'name': Node name # 3. I/O buffers (e.g. 'input_buffer', 'output_buffer'): specify the # input and output buffer names. This may depend on the node class. # 4. 'enable_key': assign a hot-key to toggle enable/disable this node. # This may depend on the node class. # 5. Other class-specific arguments nodes=[ # 'DetectorNode': # This node performs object detection from the frame image using an # MMDetection model. dict( type='DetectorNode', name='detector', model_config='D://pythonProject//mmpose//demo//mmdetection_cfg//' 'ssdlite_mobilenetv2-scratch_8xb24-600e_coco.py', model_checkpoint='https://download.openmmlab.com' '/mmdetection/v2.0/ssd/' 'ssdlite_mobilenetv2_scratch_600e_coco/ssdlite_mobilenetv2_' 'scratch_600e_coco_20210629_110627-974d9307.pth', input_buffer='input', # _input_ is an executor-reserved buffer output_buffer='det_result'), # 'TopDownPoseEstimatorNode': # This node performs keypoint detection from the frame image using an # MMPose top-down model. Detection results is needed. dict( type='TopDownPoseEstimatorNode', name='human pose estimator', model_config='D:/pythonProject/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mobilevit_coco-256x192.py', model_checkpoint="D:/pythonProject/mmpose/work_dirs/AP_epoch_300.pth", labels=['person'], input_buffer='det_result', output_buffer='human_pose'), dict( type='TopDownPoseEstimatorNode', name='animal pose estimator', model_config='configs/animal_2d_keypoint/topdown_heatmap/' 'animalpose/td-hm_hrnet-w32_8xb64-210e_animalpose-256x256.py', model_checkpoint='https://download.openmmlab.com/mmpose/animal/' 'hrnet/hrnet_w32_animalpose_256x256-1aa7f075_20210426.pth', labels=['cat', 'dog', 'horse', 'sheep', 'cow'], input_buffer='human_pose', output_buffer='animal_pose'), # 'ObjectAssignerNode': # This node binds the latest model inference result with the current # frame. (This means the frame image and inference result may be # asynchronous). dict( type='ObjectAssignerNode', name='object assigner', frame_buffer='frame', # _frame_ is an executor-reserved buffer object_buffer='animal_pose', output_buffer='frame'), # 'ObjectVisualizerNode': # This node draw the pose visualization result in the frame image. # Pose results is needed. dict( type='ObjectVisualizerNode', name='object visualizer', enable_key='v', enable=True, show_bbox=True, must_have_keypoint=False, show_keypoint=True, input_buffer='frame', output_buffer='vis'), # 'SunglassesNode': # This node draw the sunglasses effect in the frame image. # Pose results is needed. dict( type='SunglassesEffectNode', name='sunglasses', enable_key='s', enable=False, input_buffer='vis', output_buffer='vis_sunglasses'), # # 'BigeyeEffectNode': # # This node draw the big-eye effetc in the frame image. # # Pose results is needed. dict( type='BigeyeEffectNode', name='big-eye', enable_key='b', enable=False, input_buffer='vis_sunglasses', output_buffer='vis_bigeye'), # 'NoticeBoardNode': # This node show a notice board with given content, e.g. help # information. dict( type='NoticeBoardNode', name='instruction', enable_key='h', enable=True, input_buffer='vis_bigeye', output_buffer='vis_notice', content_lines=[ 'This is a demo for pose visualization and simple image ' 'effects. Have fun!', '', 'Hot-keys:', '"v": Pose estimation result visualization', '"s": Sunglasses effect B-)', '"b": Big-eye effect 0_0', '"h": Show help information', '"m": Show diagnostic information', '"q": Exit' ], ), # 'MonitorNode': # This node show diagnostic information in the frame image. It can # be used for debugging or monitoring system resource status. dict( type='MonitorNode', name='monitor', enable_key='m', enable=False, input_buffer='vis_notice', output_buffer='display'), # 'RecorderNode': # This node save the output video into a file. dict( type='RecorderNode', name='recorder', out_video_file='webcam_demo.mp4', input_buffer='display', output_buffer='display' # _display_ is an executor-reserved buffer ) ])

ChenZhenGui avatar Nov 17 '22 09:11 ChenZhenGui

This bug is caused by the distinct format of metainfo in MMPose and MMDetection. Thank you very much for pointing it out. You can modify your code as in https://github.com/open-mmlab/mmpose/pull/1813 to fix this bug.

Ben-Louis avatar Nov 17 '22 10:11 Ben-Louis