ubisoft-laforge-ZeroEGGS
ubisoft-laforge-ZeroEGGS copied to clipboard
would torch.float32 type influence precision as you use RNN in gesture decoder?
I found that it would cause that the reuslts might be different when the rnn decoder run many times. I guess this is beacuse of float32 type would harm the precision. could you give some advice or provide the pretrained model in float64.
the code are like following. There are 2 test data. the result that when you use such for loop is different from the result when you only one data to run the program respectively.
import json
import sys
import time
from pathlib import Path
from tqdm import tqdm
sys.path.append("E:\PycharmProjects\Speech2Gesture\ZEGGS")
from ZEGGS.generate import generate_gesture
all_data = [
{
"audio_path": "../data/clean/012_Happy_1_x_1_0.wav",
"style_encoding_type": "example",
"style": "../data/clean/012_Happy_1_x_1_0.bvh",
"frames": None,
"first_pose": None,
"file_name": None
},
{
"audio_path": "../data/clean/018_Relaxed_2_x_1_0.wav",
"style_encoding_type": "example",
"style": "../data/clean/018_Relaxed_2_x_1_0.bvh",
"frames": None,
"first_pose": None,
"file_name": None
},
]
temperature=1.0
seed=1234
use_gpu=True
for post_data in tqdm(all_data):
audio_path = post_data["audio_path"]
style_encoding_type = post_data["style_encoding_type"]
style = post_data["style"]
frames = post_data["frames"]
first_pose = post_data["first_pose"]
file_name = post_data["file_name"]
##
options_file="../data/outputs/v1/options.json"
##path in option.json
with open(options_file, "r") as f:
options = json.load(f)
##paths
train_options = options["train_opt"]
network_options = options["net_opt"]
paths = options["paths"]
base_path = Path(paths["base_path"])
data_path = base_path / paths["path_processed_data"]
network_path = Path(paths["models_dir"])
output_path = Path(paths["output_dir"])
results_path = None
if results_path is None:
results_path = Path(output_path) / f"results_service_debug"
##paths
styles = [(Path(style), frames)] if style_encoding_type == "example" else [style]
print("*"*100)
print(Path(audio_path))
print(styles)
print(data_path)
print(results_path)
print(style_encoding_type)
print(file_name)
print(first_pose)
print(temperature)
print(seed)
print(use_gpu)
print("\n" * 5)
generate_gesture(
audio_file=Path(audio_path),
styles=styles,
network_path=network_path,
data_path=data_path,
results_path=results_path,
style_encoding_type=style_encoding_type,
file_name=file_name,
first_pose=first_pose,
temperature=temperature,
seed=seed,
use_gpu=use_gpu
)
I save a temp para to check the results in 'write_bvh' function, that is
def write_bvh(
filename,
V_root_pos,
V_root_rot,
V_lpos,
V_lrot,
parents,
names,
order,
dt,
start_position=None,
start_rotation=None,
):
if start_position is not None and start_rotation is not None:
offset_pos = V_root_pos[0:1].copy()
offset_rot = V_root_rot[0:1].copy()
V_root_pos = quat.mul_vec(quat.inv(offset_rot), V_root_pos - offset_pos)
V_root_rot = quat.mul(quat.inv(offset_rot), V_root_rot)
V_root_pos = (
quat.mul_vec(start_rotation[np.newaxis], V_root_pos) + start_position[np.newaxis]
)
V_root_rot = quat.mul(start_rotation[np.newaxis], V_root_rot)
V_lpos = V_lpos.copy()
V_lrot = V_lrot.copy()
V_lpos[:, 0] = quat.mul_vec(V_root_rot, V_lpos[:, 0]) + V_root_pos
V_lrot[:, 0] = quat.mul(V_root_rot, V_lrot[:, 0])
### save something to debug ,change by raphaelmeng
np.save(filename[:-4] + "_local_poss.npy", V_lpos.copy())
np.save(filename[:-4] + "_local_rots.npy", np.degrees(quat.to_euler(V_lrot, order=order)).copy())
###
bvh.save(
filename,
dict(
order=order,
offsets=V_lpos[0],
names=names,
frametime=dt,
parents=parents,
positions=V_lpos,
rotations=np.degrees(quat.to_euler(V_lrot, order=order)),
),
)