3dhand
3dhand copied to clipboard
Entangled 3D joints in webcam quick test
Hi @boukhayma ,
First thing, thank you for your work and releasing the code. I refurnished it to make it work over the webcam.
On the above example, while openpose seems to perform pretty decent, the 3D hand estimation tends to entangle some joints, specially on the thumb joints looking quite rigid. Have you ever observed that outcome before?
Attaching also the test code, I might be doing something wrong here. BTW, I'm specially curious why the joins are arranged in triplets with no apparent finger-relation.
Best
from __future__ import division
import torch
from torch.autograd import Variable
from model import resnet34_Mano
# from torch.utils import data
from torchvision.transforms import ToTensor
from PIL import Image
import numpy as np
import time
import cv2
import os
import PyOpenPose as OP
# -------------------------------------------------------------------------------
# Capture
cap = cv2.VideoCapture(0)
ret, frame = cap.read()
imgSize = list(frame.shape)
outSize = imgSize[1::-1]
target_size = 240
# -------------------------------------------------------------------------------
# OpenPose
OPENPOSE_ROOT = os.environ["OPENPOSE_ROOT"]
hand_indexes = [[0,5,6],[7,8,9],[10,11,12],[17,18,19],[20,13,14],[15,16,1],[2,3,4]]
input_size = (target_size, target_size)
output_size = (target_size, target_size)
download_heatmaps = True
with_face = False
with_hands = True
handBB = [0, 0, target_size, target_size]
netres_large = (1312, 736)
netres_medium = (656, 368)
op = OP.OpenPose(netres_large, input_size, output_size,
"COCO", OPENPOSE_ROOT + os.sep + "models" + os.sep, 0,
download_heatmaps,
OP.OpenPose.ScaleMode.ZeroToOne,
with_face, with_hands)
# -------------------------------------------------------------------------------
joint_colors = [
[0, 0, 0], # wrist
[0, 255, 255], # index 1 (base)
[60, 255, 255], # index 2
[120, 255, 255], # index 3
[180, 255, 255], # index 4
[0, 255, 0], # heart 1 (base)
[60, 255, 60], # heart 2
[120, 255, 120], # heart 3
[180, 255, 180], # heart 4
[255, 0, 255], # little 1 (base)
[255, 60, 255], # little 2
[255, 120, 255], # little 3
[255, 180, 255], # little 4
[255, 0, 0], # ring 1 (base)
[255, 60, 60], # ring 2
[255, 120, 120], # ring 3
[255, 180, 180], # ring 4
[0, 0, 255], # thumb 1 (base)
[60, 60, 255], # thumb 2
[120, 120, 255], # thumb 3
[180, 180, 255], # thumb 4
]
# -------------------------------------------------------------------------------
# 0 use image only as input, 1 use image and joint heat maps as input
input_option = 1
# template = open('data/template.obj')
# content = template.readlines()
# template.close()
# -------------------------------------------------------------------------------
# Model
model = torch.nn.DataParallel(resnet34_Mano(input_option=input_option))
model.load_state_dict(torch.load('data/model-' + str(input_option) + '.pth'))
model.eval()
# -------------------------------------------------------------------------------
# Main Loop
finger = 0
actual_fps = 0
paused = False
delay = {True: 0, False: 1}
while True:
start_time = time.time()
# Capture
try:
ret, frame = cap.read()
frame = cv2.flip(frame, 1)
except Exception as e:
print("Failed to grab", e)
break
# Formatting
h, w,_ = frame.shape
offset = int((w-h) * 0.5)
bgr = frame[:h,offset:offset+h]
bgr = cv2.resize(bgr, (target_size, target_size), Image.BILINEAR)
# 2D join detection
t = time.time()
op.detectHands(bgr, np.array(handBB + [0, 0, 0, 0], dtype=np.int32).reshape((1, 8)) )
t = time.time() - t
op_fps = 1.0 / t
res = op.render(bgr)
cv2.putText(res, 'OpenPose Fps = %0.1f' % op_fps, (20, 20), 0, 0.5, (255, 255, 255))
heatmap = np.zeros(bgr.shape)
rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
inputs = [ToTensor()(rgb)]
if download_heatmaps:
left_hands, right_hands = op.getHandHeatmaps()
hands = right_hands
handmap = []
for j, l in enumerate(hand_indexes):
hm0 = hands[0, l[0], :, ::-1]
hm1 = hands[0, l[1], :, ::-1]
hm2 = hands[0, l[2], :, ::-1]
if j + 1 == finger or finger == 0:
heatmap[:, :, 0] += hm0 * 255
heatmap[:, :, 1] += hm1 * 255
heatmap[:, :, 2] += hm2 * 255
hm0 = np.expand_dims(np.fliplr(hm0 * 255).astype(np.uint8), axis=2)
hm1 = np.expand_dims(np.fliplr(hm1 * 255).astype(np.uint8), axis=2)
hm2 = np.expand_dims(np.fliplr(hm2 * 255).astype(np.uint8), axis=2)
handmap = np.concatenate((hm0, hm1, hm2), axis=2)
# handmap = cv2.resize(handmap, output_size, Image.BILINEAR) # Same as input image
inputs.append(ToTensor()(handmap))
inputs = torch.cat(inputs, dim=0)
# 3D hand detection
out1, out2 = model( Variable(inputs.cuda()).unsqueeze(0) )
# print(out1.shape)
# print(out2.shape)
# Display 2D joints
canvas = bgr.copy()
canvas.fill(255)
u, v = np.zeros(21), np.zeros(21)
for i in range(21):
u[i] = out1[0,2*i]
v[i] = out1[0,2*i+1]
cv2.circle(canvas, (int(u[i]), int(v[i])), radius=2, color=joint_colors[i], thickness=2)
actual_fps = 1.0 / (time.time() - start_time)
cv2.putText(canvas, 'Total Fps = %0.1f' % actual_fps, (20, 20), 0, 0.5, (0, 0, 0))
cv2.putText(heatmap, 'Finger = %d' % finger, (20, 20), 0, 0.5, (255, 255, 255))
composite = np.concatenate((res, heatmap.astype(np.uint8)), axis=1)
composite = np.concatenate((composite, canvas), axis=1)
cv2.imshow('3D Hand Pose', composite)
# Save 3D mesh
# file1 = open('data/out/'+str(i)+'.obj','w')
# for j in xrange(778):
# file1.write("v %f %f %f\n"%(out2[0,21+j,0],-out2[0,21+j,1],-out2[0,21+j,2]))
# for j,x in enumerate(content):
# a = x[:len(x)-1].split(" ")
# if (a[0] == 'f'):
# file1.write(x)
# file1.close()
key = cv2.waitKey(delay[paused])
if key & 0xFF == 27: # esc to exit
cap.release()
break
if key >= ord('0') and key <= ord('7'):
finger = int(chr(key))
cv2.destroyAllWindows()