DarkPose icon indicating copy to clipboard operation
DarkPose copied to clipboard

How can I test it on arbitrary RGB image?

Open aligoglos opened this issue 4 years ago • 5 comments

I've tried to write demo code but I got stuck how to interpreter output of network:

import argparse
import os
import cv2
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
from config import cfg
from config import update_config
from core.inference import get_final_preds
from utils.vis import save_debug_images
import glob
from models.pose_hrnet import get_pose_net

def parse_args():
	parser = argparse.ArgumentParser(description='Train keypoints network')
	# general
	parser.add_argument('--cfg',
						help='experiment configure file name',
						default='experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml',
						type=str)

	parser.add_argument('opts',
						help="Modify config options using the command-line",
						default=None,
						nargs=argparse.REMAINDER)

	parser.add_argument('--modelDir',
						help='model directory',
						type=str,
						default='')
	parser.add_argument('--logDir',
						help='log directory',
						type=str,
						default='')
	parser.add_argument('--dataDir',
						help='data directory',
						type=str,
						default='./Inputs/')
	parser.add_argument('--prevModelDir',
						help='prev Model directory',
						type=str,
						default='')

	args = parser.parse_args()
	return args

def save_images(img, joints_pred, name,nrow=8, padding=2):
	height = int(img.size(0) + padding)
	width = int(img.size(1) + padding)
	nmaps = 1
	xmaps = min(nrow, nmaps)
	ymaps = int(math.ceil(float(nmaps) / xmaps))
	height = int(batch_image.size(2) + padding)
	width = int(batch_image.size(3) + padding)
	k = 0
	for y in range(ymaps):
		for x in range(xmaps):
			if k >= nmaps:
				break
			joints = batch_joints[k]
			joints_vis = batch_joints_vis[k]
			for joint in joints:
				joint[0] = x * width + padding + joint[0]
				joint[1] = y * height + padding + joint[1]
				cv2.circle(img, (int(joint[0]), int(joint[1])), 2, [255, 0, 0], 2)
			k = k + 1
	cv2.imwrite(f"Results/{name}", img)

def main():
	normalize = transforms.Normalize(
			mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
		)
	transform = transforms.Compose([
		transforms.ToTensor(),
		normalize,
	])
	args = parse_args()
	update_config(cfg, args)
	image_size = np.array(cfg.MODEL.IMAGE_SIZE)

	model = get_pose_net(
		cfg, is_train=False
	)

	if cfg.TEST.MODEL_FILE:
		model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False)
	else:
		model_state_file = os.path.join(
			final_output_dir, 'final_state.pth'
		)
		model.load_state_dict(torch.load(model_state_file))

	model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
	
	img_path_l = sorted(glob.glob('./Inputs' + '/*'))
	with torch.no_grad():
		for path in img_path_l:
			name  = path.split('/')[-1]
			image = cv2.imread(path)
			image = cv2.resize(image, (384, 288))
			input = transform(image).unsqueeze(0)
			#print(input.shape)
			outputs = model(input)
			if isinstance(outputs, list):
				output = outputs[-1]
			else:
				output = outputs
			print(f"{name} : {output.shape}")
	

if __name__ == '__main__':
	main()

I don't know what I set scale and center in get_final_preds .

aligoglos avatar Aug 08 '20 11:08 aligoglos

I've tried to write demo code but I got stuck how to interpreter output of network:

import argparse
import os
import cv2
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
from config import cfg
from config import update_config
from core.inference import get_final_preds
from utils.vis import save_debug_images
import glob
from models.pose_hrnet import get_pose_net

def parse_args():
	parser = argparse.ArgumentParser(description='Train keypoints network')
	# general
	parser.add_argument('--cfg',
						help='experiment configure file name',
						default='experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml',
						type=str)

	parser.add_argument('opts',
						help="Modify config options using the command-line",
						default=None,
						nargs=argparse.REMAINDER)

	parser.add_argument('--modelDir',
						help='model directory',
						type=str,
						default='')
	parser.add_argument('--logDir',
						help='log directory',
						type=str,
						default='')
	parser.add_argument('--dataDir',
						help='data directory',
						type=str,
						default='./Inputs/')
	parser.add_argument('--prevModelDir',
						help='prev Model directory',
						type=str,
						default='')

	args = parser.parse_args()
	return args

def save_images(img, joints_pred, name,nrow=8, padding=2):
	height = int(img.size(0) + padding)
	width = int(img.size(1) + padding)
	nmaps = 1
	xmaps = min(nrow, nmaps)
	ymaps = int(math.ceil(float(nmaps) / xmaps))
	height = int(batch_image.size(2) + padding)
	width = int(batch_image.size(3) + padding)
	k = 0
	for y in range(ymaps):
		for x in range(xmaps):
			if k >= nmaps:
				break
			joints = batch_joints[k]
			joints_vis = batch_joints_vis[k]
			for joint in joints:
				joint[0] = x * width + padding + joint[0]
				joint[1] = y * height + padding + joint[1]
				cv2.circle(img, (int(joint[0]), int(joint[1])), 2, [255, 0, 0], 2)
			k = k + 1
	cv2.imwrite(f"Results/{name}", img)

def main():
	normalize = transforms.Normalize(
			mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
		)
	transform = transforms.Compose([
		transforms.ToTensor(),
		normalize,
	])
	args = parse_args()
	update_config(cfg, args)
	image_size = np.array(cfg.MODEL.IMAGE_SIZE)

	model = get_pose_net(
		cfg, is_train=False
	)

	if cfg.TEST.MODEL_FILE:
		model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False)
	else:
		model_state_file = os.path.join(
			final_output_dir, 'final_state.pth'
		)
		model.load_state_dict(torch.load(model_state_file))

	model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
	
	img_path_l = sorted(glob.glob('./Inputs' + '/*'))
	with torch.no_grad():
		for path in img_path_l:
			name  = path.split('/')[-1]
			image = cv2.imread(path)
			image = cv2.resize(image, (384, 288))
			input = transform(image).unsqueeze(0)
			#print(input.shape)
			outputs = model(input)
			if isinstance(outputs, list):
				output = outputs[-1]
			else:
				output = outputs
			print(f"{name} : {output.shape}")
	

if __name__ == '__main__':
	main()

I don't know what I set scale and center in get_final_preds .

cv2.resize hided scale and center.

hbin-ac avatar Aug 17 '20 06:08 hbin-ac

What should I do?

aligoglos avatar Aug 17 '20 14:08 aligoglos

did you solved this problem?

q5390498 avatar Sep 05 '20 06:09 q5390498

I find follow code from mmpose project, maybe it can sovle this problem:

`
def _get_multi_scale_size(image, input_size, current_scale, min_scale):
    """Get the size for multi-scale training
    Args:
        image: Input image.
        input_size (int): Size of the image input.
        current_scale (float): Scale factor.
        min_scale (float): Minimal scale.

    Returns:
        tuple: A tuple containing multi-scale sizes.

        - (w_resized, h_resized) (tuple(int)): resized width/height
        - center (np.ndarray)image center
        - scale (np.ndarray): scales wrt width/height
    """
    h, w, _ = image.shape

    center = np.array([round(w / 2.0), round(h / 2.0)])

    # calculate the size for min_scale
    min_input_size = _ceil_to_multiples_of(min_scale * input_size, 64)
    if w < h:
        w_resized = int(min_input_size * current_scale / min_scale)
        h_resized = int(
            _ceil_to_multiples_of(min_input_size / w * h, 64) * current_scale /
            min_scale)
        scale_w = w / 200.0
        scale_h = h_resized / w_resized * w / 200.0
    else:
        h_resized = int(min_input_size * current_scale / min_scale)
        w_resized = int(
            _ceil_to_multiples_of(min_input_size / h * w, 64) * current_scale /
            min_scale)
        scale_h = h / 200.0
        scale_w = w_resized / h_resized * h / 200.0

    return (w_resized, h_resized), center, np.array([scale_w, scale_h])
`

Kuekua avatar Mar 11 '21 03:03 Kuekua

I find follow code from this project, maybe it can sovle this problem:

def box_to_center_scale(box, model_image_width, model_image_height):

"""convert a box to center,scale information required for pose transformation
Parameters
----------
box : list of tuple
    list of length 2 with two tuples of floats representing
    bottom left and top right corner of a box
model_image_width : int
model_image_height : int
Returns
-------
(numpy array, numpy array)
    Two numpy arrays, coordinates for the center of the box and the scale of the box
"""

center = np.zeros((2), dtype=np.float32)
bottom_left_corner = box[0]
top_right_corner = box[1]
box_width = top_right_corner[0]-bottom_left_corner[0]
box_height = top_right_corner[1]-bottom_left_corner[1]
bottom_left_x = bottom_left_corner[0]
bottom_left_y = bottom_left_corner[1]
center[0] = bottom_left_x + box_width * 0.5
center[1] = bottom_left_y + box_height * 0.5

aspect_ratio = model_image_width * 1.0 / model_image_height
pixel_std = 200

if box_width > aspect_ratio * box_height:
    box_height = box_width * 1.0 / aspect_ratio
elif box_width < aspect_ratio * box_height:
    box_width = box_height * aspect_ratio
scale = np.array(
    [box_width * 1.0 / pixel_std, box_height * 1.0 / pixel_std],
    dtype=np.float32)
if center[0] != -1:
    scale = scale * 1.25

return center, scale

mlantern avatar Jul 08 '22 03:07 mlantern