DarkPose
DarkPose copied to clipboard
How can I test it on arbitrary RGB image?
I've tried to write demo code but I got stuck how to interpreter output of network:
import argparse
import os
import cv2
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
from config import cfg
from config import update_config
from core.inference import get_final_preds
from utils.vis import save_debug_images
import glob
from models.pose_hrnet import get_pose_net
def parse_args():
parser = argparse.ArgumentParser(description='Train keypoints network')
# general
parser.add_argument('--cfg',
help='experiment configure file name',
default='experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml',
type=str)
parser.add_argument('opts',
help="Modify config options using the command-line",
default=None,
nargs=argparse.REMAINDER)
parser.add_argument('--modelDir',
help='model directory',
type=str,
default='')
parser.add_argument('--logDir',
help='log directory',
type=str,
default='')
parser.add_argument('--dataDir',
help='data directory',
type=str,
default='./Inputs/')
parser.add_argument('--prevModelDir',
help='prev Model directory',
type=str,
default='')
args = parser.parse_args()
return args
def save_images(img, joints_pred, name,nrow=8, padding=2):
height = int(img.size(0) + padding)
width = int(img.size(1) + padding)
nmaps = 1
xmaps = min(nrow, nmaps)
ymaps = int(math.ceil(float(nmaps) / xmaps))
height = int(batch_image.size(2) + padding)
width = int(batch_image.size(3) + padding)
k = 0
for y in range(ymaps):
for x in range(xmaps):
if k >= nmaps:
break
joints = batch_joints[k]
joints_vis = batch_joints_vis[k]
for joint in joints:
joint[0] = x * width + padding + joint[0]
joint[1] = y * height + padding + joint[1]
cv2.circle(img, (int(joint[0]), int(joint[1])), 2, [255, 0, 0], 2)
k = k + 1
cv2.imwrite(f"Results/{name}", img)
def main():
normalize = transforms.Normalize(
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
)
transform = transforms.Compose([
transforms.ToTensor(),
normalize,
])
args = parse_args()
update_config(cfg, args)
image_size = np.array(cfg.MODEL.IMAGE_SIZE)
model = get_pose_net(
cfg, is_train=False
)
if cfg.TEST.MODEL_FILE:
model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False)
else:
model_state_file = os.path.join(
final_output_dir, 'final_state.pth'
)
model.load_state_dict(torch.load(model_state_file))
model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
img_path_l = sorted(glob.glob('./Inputs' + '/*'))
with torch.no_grad():
for path in img_path_l:
name = path.split('/')[-1]
image = cv2.imread(path)
image = cv2.resize(image, (384, 288))
input = transform(image).unsqueeze(0)
#print(input.shape)
outputs = model(input)
if isinstance(outputs, list):
output = outputs[-1]
else:
output = outputs
print(f"{name} : {output.shape}")
if __name__ == '__main__':
main()
I don't know what I set scale and center in get_final_preds
.
I've tried to write demo code but I got stuck how to interpreter output of network:
import argparse import os import cv2 import numpy as np import torch import torchvision import torchvision.transforms as transforms from config import cfg from config import update_config from core.inference import get_final_preds from utils.vis import save_debug_images import glob from models.pose_hrnet import get_pose_net def parse_args(): parser = argparse.ArgumentParser(description='Train keypoints network') # general parser.add_argument('--cfg', help='experiment configure file name', default='experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml', type=str) parser.add_argument('opts', help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) parser.add_argument('--modelDir', help='model directory', type=str, default='') parser.add_argument('--logDir', help='log directory', type=str, default='') parser.add_argument('--dataDir', help='data directory', type=str, default='./Inputs/') parser.add_argument('--prevModelDir', help='prev Model directory', type=str, default='') args = parser.parse_args() return args def save_images(img, joints_pred, name,nrow=8, padding=2): height = int(img.size(0) + padding) width = int(img.size(1) + padding) nmaps = 1 xmaps = min(nrow, nmaps) ymaps = int(math.ceil(float(nmaps) / xmaps)) height = int(batch_image.size(2) + padding) width = int(batch_image.size(3) + padding) k = 0 for y in range(ymaps): for x in range(xmaps): if k >= nmaps: break joints = batch_joints[k] joints_vis = batch_joints_vis[k] for joint in joints: joint[0] = x * width + padding + joint[0] joint[1] = y * height + padding + joint[1] cv2.circle(img, (int(joint[0]), int(joint[1])), 2, [255, 0, 0], 2) k = k + 1 cv2.imwrite(f"Results/{name}", img) def main(): normalize = transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ) transform = transforms.Compose([ transforms.ToTensor(), normalize, ]) args = parse_args() update_config(cfg, args) image_size = np.array(cfg.MODEL.IMAGE_SIZE) model = get_pose_net( cfg, is_train=False ) if cfg.TEST.MODEL_FILE: model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False) else: model_state_file = os.path.join( final_output_dir, 'final_state.pth' ) model.load_state_dict(torch.load(model_state_file)) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() img_path_l = sorted(glob.glob('./Inputs' + '/*')) with torch.no_grad(): for path in img_path_l: name = path.split('/')[-1] image = cv2.imread(path) image = cv2.resize(image, (384, 288)) input = transform(image).unsqueeze(0) #print(input.shape) outputs = model(input) if isinstance(outputs, list): output = outputs[-1] else: output = outputs print(f"{name} : {output.shape}") if __name__ == '__main__': main()
I don't know what I set scale and center in
get_final_preds
.
cv2.resize hided scale and center.
What should I do?
did you solved this problem?
I find follow code from mmpose project, maybe it can sovle this problem:
`
def _get_multi_scale_size(image, input_size, current_scale, min_scale):
"""Get the size for multi-scale training
Args:
image: Input image.
input_size (int): Size of the image input.
current_scale (float): Scale factor.
min_scale (float): Minimal scale.
Returns:
tuple: A tuple containing multi-scale sizes.
- (w_resized, h_resized) (tuple(int)): resized width/height
- center (np.ndarray)image center
- scale (np.ndarray): scales wrt width/height
"""
h, w, _ = image.shape
center = np.array([round(w / 2.0), round(h / 2.0)])
# calculate the size for min_scale
min_input_size = _ceil_to_multiples_of(min_scale * input_size, 64)
if w < h:
w_resized = int(min_input_size * current_scale / min_scale)
h_resized = int(
_ceil_to_multiples_of(min_input_size / w * h, 64) * current_scale /
min_scale)
scale_w = w / 200.0
scale_h = h_resized / w_resized * w / 200.0
else:
h_resized = int(min_input_size * current_scale / min_scale)
w_resized = int(
_ceil_to_multiples_of(min_input_size / h * w, 64) * current_scale /
min_scale)
scale_h = h / 200.0
scale_w = w_resized / h_resized * h / 200.0
return (w_resized, h_resized), center, np.array([scale_w, scale_h])
`
I find follow code from this project, maybe it can sovle this problem:
def box_to_center_scale(box, model_image_width, model_image_height):
"""convert a box to center,scale information required for pose transformation
Parameters
----------
box : list of tuple
list of length 2 with two tuples of floats representing
bottom left and top right corner of a box
model_image_width : int
model_image_height : int
Returns
-------
(numpy array, numpy array)
Two numpy arrays, coordinates for the center of the box and the scale of the box
"""
center = np.zeros((2), dtype=np.float32)
bottom_left_corner = box[0]
top_right_corner = box[1]
box_width = top_right_corner[0]-bottom_left_corner[0]
box_height = top_right_corner[1]-bottom_left_corner[1]
bottom_left_x = bottom_left_corner[0]
bottom_left_y = bottom_left_corner[1]
center[0] = bottom_left_x + box_width * 0.5
center[1] = bottom_left_y + box_height * 0.5
aspect_ratio = model_image_width * 1.0 / model_image_height
pixel_std = 200
if box_width > aspect_ratio * box_height:
box_height = box_width * 1.0 / aspect_ratio
elif box_width < aspect_ratio * box_height:
box_width = box_height * aspect_ratio
scale = np.array(
[box_width * 1.0 / pixel_std, box_height * 1.0 / pixel_std],
dtype=np.float32)
if center[0] != -1:
scale = scale * 1.25
return center, scale