humannerf
humannerf copied to clipboard
Use 4D Human For Data Prep
Hi all,
I tried to use 4D humans to estimate the poses and everything to do data prep. Code snippet I used/wrote is as bellow. I tried to visualize the smpl results and they seem to be fine. I even tried to run 4dhumans on ZJU mocap, and only use the smpl outputs from 4dhumans and keep everything else as is in ZJU mocap. However, in humannerf it would give me errors of "rgb" not found, seemingly the rays not being able to intersect the bbox.
I also used ROMP, which would give me similar errors, with partial validation outputs as attached. I wonder if any of you have experienced anything like this before and how did you guys fix it.
Any discussion is appreciated.
def estimate_extrinsics_cv2(translation):
tra_pred = translation
cam_extrinsics = np.eye(4)
cam_extrinsics[:3, 3] = tra_pred
return cam_extrinsics.astype(float)
def rotation_matrix_to_quaternion(rotation_matrix, eps=1e-6):
"""
This function is borrowed from https://github.com/kornia/kornia
Convert rotation matrix to 4d quaternion vector
This algorithm is based on algorithm described in
https://github.com/KieranWynn/pyquaternion/blob/master/pyquaternion/quaternion.py#L201
:param rotation_matrix (N, 3, 3)
"""
*dims, m, n = rotation_matrix.shape
rmat_t = torch.transpose(rotation_matrix.reshape(-1, m, n), -1, -2)
mask_d2 = rmat_t[:, 2, 2] < eps
mask_d0_d1 = rmat_t[:, 0, 0] > rmat_t[:, 1, 1]
mask_d0_nd1 = rmat_t[:, 0, 0] < -rmat_t[:, 1, 1]
t0 = 1 + rmat_t[:, 0, 0] - rmat_t[:, 1, 1] - rmat_t[:, 2, 2]
q0 = torch.stack(
[
rmat_t[:, 1, 2] - rmat_t[:, 2, 1],
t0,
rmat_t[:, 0, 1] + rmat_t[:, 1, 0],
rmat_t[:, 2, 0] + rmat_t[:, 0, 2],
],
-1,
)
t0_rep = t0.repeat(4, 1).t()
t1 = 1 - rmat_t[:, 0, 0] + rmat_t[:, 1, 1] - rmat_t[:, 2, 2]
q1 = torch.stack(
[
rmat_t[:, 2, 0] - rmat_t[:, 0, 2],
rmat_t[:, 0, 1] + rmat_t[:, 1, 0],
t1,
rmat_t[:, 1, 2] + rmat_t[:, 2, 1],
],
-1,
)
t1_rep = t1.repeat(4, 1).t()
t2 = 1 - rmat_t[:, 0, 0] - rmat_t[:, 1, 1] + rmat_t[:, 2, 2]
q2 = torch.stack(
[
rmat_t[:, 0, 1] - rmat_t[:, 1, 0],
rmat_t[:, 2, 0] + rmat_t[:, 0, 2],
rmat_t[:, 1, 2] + rmat_t[:, 2, 1],
t2,
],
-1,
)
t2_rep = t2.repeat(4, 1).t()
t3 = 1 + rmat_t[:, 0, 0] + rmat_t[:, 1, 1] + rmat_t[:, 2, 2]
q3 = torch.stack(
[
t3,
rmat_t[:, 1, 2] - rmat_t[:, 2, 1],
rmat_t[:, 2, 0] - rmat_t[:, 0, 2],
rmat_t[:, 0, 1] - rmat_t[:, 1, 0],
],
-1,
)
t3_rep = t3.repeat(4, 1).t()
mask_c0 = mask_d2 * mask_d0_d1
mask_c1 = mask_d2 * ~mask_d0_d1
mask_c2 = ~mask_d2 * mask_d0_nd1
mask_c3 = ~mask_d2 * ~mask_d0_nd1
mask_c0 = mask_c0.view(-1, 1).type_as(q0)
mask_c1 = mask_c1.view(-1, 1).type_as(q1)
mask_c2 = mask_c2.view(-1, 1).type_as(q2)
mask_c3 = mask_c3.view(-1, 1).type_as(q3)
q = q0 * mask_c0 + q1 * mask_c1 + q2 * mask_c2 + q3 * mask_c3
q /= torch.sqrt(
t0_rep * mask_c0
+ t1_rep * mask_c1
+ t2_rep * mask_c2 # noqa
+ t3_rep * mask_c3
) # noqa
q *= 0.5
return q.reshape(*dims, 4)
def quaternion_to_angle_axis(quaternion):
"""
This function is borrowed from https://github.com/kornia/kornia
Convert quaternion vector to angle axis of rotation.
Adapted from ceres C++ library: ceres-solver/include/ceres/rotation.h
:param quaternion (*, 4) expects WXYZ
:returns angle_axis (*, 3)
"""
# unpack input and compute conversion
q1 = quaternion[..., 1]
q2 = quaternion[..., 2]
q3 = quaternion[..., 3]
sin_squared_theta = q1 * q1 + q2 * q2 + q3 * q3
sin_theta = torch.sqrt(sin_squared_theta)
cos_theta = quaternion[..., 0]
two_theta = 2.0 * torch.where(
cos_theta < 0.0,
torch.atan2(-sin_theta, -cos_theta),
torch.atan2(sin_theta, cos_theta),
)
k_pos = two_theta / sin_theta
k_neg = 2.0 * torch.ones_like(sin_theta)
k = torch.where(sin_squared_theta > 0.0, k_pos, k_neg)
angle_axis = torch.zeros_like(quaternion)[..., :3]
angle_axis[..., 0] += q1 * k
angle_axis[..., 1] += q2 * k
angle_axis[..., 2] += q3 * k
return angle_axis
def rotation_matrix_to_angle_axis(rotation_matrix):
"""
This function is borrowed from https://github.com/kornia/kornia
Convert rotation matrix to Rodrigues vector
"""
quaternion = rotation_matrix_to_quaternion(rotation_matrix)
aa = quaternion_to_angle_axis(quaternion)
aa[torch.isnan(aa)] = 0.0
return aa
def process_metadata(filename, intrinsics = [[1.1562489e+03, 0.0000000e+00, 5.1707324e+02], # 313
[0.0000000e+00, 1.1562522e+03, 5.1809784e+02],
[0.0000000e+00, 0.0000000e+00, 1.0000000e+00]]):
results = {}
data = joblib.load(filename)
keys = list(data.keys())
for frame_id in tqdm(range(len(keys)), desc="Processing frames"):
frame = keys[frame_id]
# print("currently processing " + frame)
smpl = data[frame]["smpl"]
mask = data[frame]["mask"]
cam = data[frame]["camera"]
frame_pth = data[frame]["frame_path"]
for player_id in range(len(smpl)):
combined_id = str(frame_id) + "_" + str(player_id)
smpl_p = smpl[player_id]
mask_p = pycocotools.mask.decode(mask[player_id])
cam_p = cam[player_id]
frame_pth_p = os.path.join(os.path.join(os.getcwd(), "4D-Humans"), frame_pth)
# os.mkdir("dataset/wild/monocular/images")
# os.mkdir("dataset/wild/monocular/masks")
# _, extension = os.path.splitext(frame_pth_p)
image = plt.imread(frame_pth_p)
plt.imsave("dataset/wild/monocular/images/" + combined_id + ".png", image)
# print(mask_p.shape)
cv2.imwrite("dataset/wild/monocular/masks/"+ combined_id + ".png", mask_p * 255)
# print(frame_pth_p)
converted_poses = []
global_orient = torch.tensor(smpl_p["global_orient"])
print(global_orient[0])
converted_poses.append(rotation_matrix_to_angle_axis(global_orient[0]))
for pose in smpl_p["body_pose"].astype(float):
pose = torch.Tensor(pose)
rotVec = rotation_matrix_to_angle_axis(pose)
converted_poses.append(rotVec)
dictionary = {
"poses": torch.cat(converted_poses).tolist(),
"betas": smpl_p["betas"].astype(float).tolist(),
"cam_intrinsics": intrinsics,
"cam_extrinsics": estimate_extrinsics_cv2(cam_p).tolist()
}
# print(np.array(dictionary["poses"]).shape)
results[combined_id] = dictionary
# print(results)
json_object = json.dumps(results, indent=4)
with open("/datadrive/sportsnerf/dataset/wild/monocular/metadata.json", "w") as outfile:
outfile.write(json_object)
Hi all,
I tried to use 4D humans to estimate the poses and everything to do data prep. Code snippet I used/wrote is as bellow. I tried to visualize the smpl results and they seem to be fine. I even tried to run 4dhumans on ZJU mocap, and only use the smpl outputs from 4dhumans and keep everything else as is in ZJU mocap. However, in humannerf it would give me errors of "rgb" not found, seemingly the rays not being able to intersect the bbox.
I also used ROMP, which would give me similar errors, with partial validation outputs as attached. I wonder if any of you have experienced anything like this before and how did you guys fix it.
Any discussion is appreciated.
def estimate_extrinsics_cv2(translation): tra_pred = translation cam_extrinsics = np.eye(4) cam_extrinsics[:3, 3] = tra_pred return cam_extrinsics.astype(float) def rotation_matrix_to_quaternion(rotation_matrix, eps=1e-6): """ This function is borrowed from https://github.com/kornia/kornia Convert rotation matrix to 4d quaternion vector This algorithm is based on algorithm described in https://github.com/KieranWynn/pyquaternion/blob/master/pyquaternion/quaternion.py#L201 :param rotation_matrix (N, 3, 3) """ *dims, m, n = rotation_matrix.shape rmat_t = torch.transpose(rotation_matrix.reshape(-1, m, n), -1, -2) mask_d2 = rmat_t[:, 2, 2] < eps mask_d0_d1 = rmat_t[:, 0, 0] > rmat_t[:, 1, 1] mask_d0_nd1 = rmat_t[:, 0, 0] < -rmat_t[:, 1, 1] t0 = 1 + rmat_t[:, 0, 0] - rmat_t[:, 1, 1] - rmat_t[:, 2, 2] q0 = torch.stack( [ rmat_t[:, 1, 2] - rmat_t[:, 2, 1], t0, rmat_t[:, 0, 1] + rmat_t[:, 1, 0], rmat_t[:, 2, 0] + rmat_t[:, 0, 2], ], -1, ) t0_rep = t0.repeat(4, 1).t() t1 = 1 - rmat_t[:, 0, 0] + rmat_t[:, 1, 1] - rmat_t[:, 2, 2] q1 = torch.stack( [ rmat_t[:, 2, 0] - rmat_t[:, 0, 2], rmat_t[:, 0, 1] + rmat_t[:, 1, 0], t1, rmat_t[:, 1, 2] + rmat_t[:, 2, 1], ], -1, ) t1_rep = t1.repeat(4, 1).t() t2 = 1 - rmat_t[:, 0, 0] - rmat_t[:, 1, 1] + rmat_t[:, 2, 2] q2 = torch.stack( [ rmat_t[:, 0, 1] - rmat_t[:, 1, 0], rmat_t[:, 2, 0] + rmat_t[:, 0, 2], rmat_t[:, 1, 2] + rmat_t[:, 2, 1], t2, ], -1, ) t2_rep = t2.repeat(4, 1).t() t3 = 1 + rmat_t[:, 0, 0] + rmat_t[:, 1, 1] + rmat_t[:, 2, 2] q3 = torch.stack( [ t3, rmat_t[:, 1, 2] - rmat_t[:, 2, 1], rmat_t[:, 2, 0] - rmat_t[:, 0, 2], rmat_t[:, 0, 1] - rmat_t[:, 1, 0], ], -1, ) t3_rep = t3.repeat(4, 1).t() mask_c0 = mask_d2 * mask_d0_d1 mask_c1 = mask_d2 * ~mask_d0_d1 mask_c2 = ~mask_d2 * mask_d0_nd1 mask_c3 = ~mask_d2 * ~mask_d0_nd1 mask_c0 = mask_c0.view(-1, 1).type_as(q0) mask_c1 = mask_c1.view(-1, 1).type_as(q1) mask_c2 = mask_c2.view(-1, 1).type_as(q2) mask_c3 = mask_c3.view(-1, 1).type_as(q3) q = q0 * mask_c0 + q1 * mask_c1 + q2 * mask_c2 + q3 * mask_c3 q /= torch.sqrt( t0_rep * mask_c0 + t1_rep * mask_c1 + t2_rep * mask_c2 # noqa + t3_rep * mask_c3 ) # noqa q *= 0.5 return q.reshape(*dims, 4) def quaternion_to_angle_axis(quaternion): """ This function is borrowed from https://github.com/kornia/kornia Convert quaternion vector to angle axis of rotation. Adapted from ceres C++ library: ceres-solver/include/ceres/rotation.h :param quaternion (*, 4) expects WXYZ :returns angle_axis (*, 3) """ # unpack input and compute conversion q1 = quaternion[..., 1] q2 = quaternion[..., 2] q3 = quaternion[..., 3] sin_squared_theta = q1 * q1 + q2 * q2 + q3 * q3 sin_theta = torch.sqrt(sin_squared_theta) cos_theta = quaternion[..., 0] two_theta = 2.0 * torch.where( cos_theta < 0.0, torch.atan2(-sin_theta, -cos_theta), torch.atan2(sin_theta, cos_theta), ) k_pos = two_theta / sin_theta k_neg = 2.0 * torch.ones_like(sin_theta) k = torch.where(sin_squared_theta > 0.0, k_pos, k_neg) angle_axis = torch.zeros_like(quaternion)[..., :3] angle_axis[..., 0] += q1 * k angle_axis[..., 1] += q2 * k angle_axis[..., 2] += q3 * k return angle_axis def rotation_matrix_to_angle_axis(rotation_matrix): """ This function is borrowed from https://github.com/kornia/kornia Convert rotation matrix to Rodrigues vector """ quaternion = rotation_matrix_to_quaternion(rotation_matrix) aa = quaternion_to_angle_axis(quaternion) aa[torch.isnan(aa)] = 0.0 return aa def process_metadata(filename, intrinsics = [[1.1562489e+03, 0.0000000e+00, 5.1707324e+02], # 313 [0.0000000e+00, 1.1562522e+03, 5.1809784e+02], [0.0000000e+00, 0.0000000e+00, 1.0000000e+00]]): results = {} data = joblib.load(filename) keys = list(data.keys()) for frame_id in tqdm(range(len(keys)), desc="Processing frames"): frame = keys[frame_id] # print("currently processing " + frame) smpl = data[frame]["smpl"] mask = data[frame]["mask"] cam = data[frame]["camera"] frame_pth = data[frame]["frame_path"] for player_id in range(len(smpl)): combined_id = str(frame_id) + "_" + str(player_id) smpl_p = smpl[player_id] mask_p = pycocotools.mask.decode(mask[player_id]) cam_p = cam[player_id] frame_pth_p = os.path.join(os.path.join(os.getcwd(), "4D-Humans"), frame_pth) # os.mkdir("dataset/wild/monocular/images") # os.mkdir("dataset/wild/monocular/masks") # _, extension = os.path.splitext(frame_pth_p) image = plt.imread(frame_pth_p) plt.imsave("dataset/wild/monocular/images/" + combined_id + ".png", image) # print(mask_p.shape) cv2.imwrite("dataset/wild/monocular/masks/"+ combined_id + ".png", mask_p * 255) # print(frame_pth_p) converted_poses = [] global_orient = torch.tensor(smpl_p["global_orient"]) print(global_orient[0]) converted_poses.append(rotation_matrix_to_angle_axis(global_orient[0])) for pose in smpl_p["body_pose"].astype(float): pose = torch.Tensor(pose) rotVec = rotation_matrix_to_angle_axis(pose) converted_poses.append(rotVec) dictionary = { "poses": torch.cat(converted_poses).tolist(), "betas": smpl_p["betas"].astype(float).tolist(), "cam_intrinsics": intrinsics, "cam_extrinsics": estimate_extrinsics_cv2(cam_p).tolist() } # print(np.array(dictionary["poses"]).shape) results[combined_id] = dictionary # print(results) json_object = json.dumps(results, indent=4) with open("/datadrive/sportsnerf/dataset/wild/monocular/metadata.json", "w") as outfile: outfile.write(json_object)
Which processing file did you follow for 4D Humans? The wild or the zju-mocap. Also can you mention which all data you have? Like which ones in smpl like that
Hi @Dipankar1997161 I followed the wild processing script. Not sure if I fully understand the second question.
Hi @Dipankar1997161 I followed the wild processing script. Not sure if I fully understand the second question.
What I am asking is, what parameters you have in your smpl. Like global orient and things like that? That is what I was asking
Hi @Dipankar1997161 I followed the wild processing script. Not sure if I fully understand the second question.
But I will tell you one thing. You did not got any render, plus the GT images you got are Cropped out. The Cropped GT images mean the camera values are wrong.
Secondly no projection of Render images, means the sampling rays are projected somewhere outside the bounding box which are generated by the "Joints" you receive from SMPL model. Also, make sure to check if the cam values are in World coordinate or camera coordinate. I was training on a data with Camera coordinate system and didn't got the results but I switched it to World coordinate and my training was better.