ubisoft-laforge-ZeroEGGS would torch.float32 type influence precision as you use RNN in gesture decoder?

would torch.float32 type influence precision as you use RNN in gesture decoder?

Open MengHao666 opened this issue 1 year ago • 1 comments

I found that it would cause that the reuslts might be different when the rnn decoder run many times. I guess this is beacuse of float32 type would harm the precision. could you give some advice or provide the pretrained model in float64.

May 28 '23 16:05 MengHao666

the code are like following. There are 2 test data. the result that when you use such for loop is different from the result when you only one data to run the program respectively.

import json
import sys
import time
from pathlib import Path

from tqdm import tqdm

sys.path.append("E:\PycharmProjects\Speech2Gesture\ZEGGS")
from ZEGGS.generate import generate_gesture

all_data = [


    {
        "audio_path": "../data/clean/012_Happy_1_x_1_0.wav",
        "style_encoding_type": "example",
        "style": "../data/clean/012_Happy_1_x_1_0.bvh",
        "frames": None,
        "first_pose": None,
        "file_name": None
    },
    {
        "audio_path": "../data/clean/018_Relaxed_2_x_1_0.wav",
        "style_encoding_type": "example",
        "style": "../data/clean/018_Relaxed_2_x_1_0.bvh",
        "frames": None,
        "first_pose": None,
        "file_name": None
    },
]

temperature=1.0
seed=1234
use_gpu=True

for post_data in tqdm(all_data):
    audio_path = post_data["audio_path"]
    style_encoding_type = post_data["style_encoding_type"]
    style = post_data["style"]
    frames = post_data["frames"]
    first_pose = post_data["first_pose"]
    file_name = post_data["file_name"]

    ##
    options_file="../data/outputs/v1/options.json"
    ##path in option.json
    with open(options_file, "r") as f:
        options = json.load(f)
    ##paths
    train_options = options["train_opt"]
    network_options = options["net_opt"]
    paths = options["paths"]

    base_path = Path(paths["base_path"])
    data_path = base_path / paths["path_processed_data"]

    network_path = Path(paths["models_dir"])
    output_path = Path(paths["output_dir"])

    results_path = None
    if results_path is None:
        results_path = Path(output_path) / f"results_service_debug"
    ##paths

    styles = [(Path(style), frames)] if style_encoding_type == "example" else [style]

    print("*"*100)

    print(Path(audio_path))
    print(styles)
    print(data_path)
    print(results_path)
    print(style_encoding_type)
    print(file_name)
    print(first_pose)
    print(temperature)
    print(seed)
    print(use_gpu)
    print("\n" * 5)




    generate_gesture(
        audio_file=Path(audio_path),
        styles=styles,
        network_path=network_path,
        data_path=data_path,
        results_path=results_path,
        style_encoding_type=style_encoding_type,
        file_name=file_name,
        first_pose=first_pose,
        temperature=temperature,
        seed=seed,
        use_gpu=use_gpu
    )

I save a temp para to check the results in 'write_bvh' function, that is

def write_bvh(
        filename,
        V_root_pos,
        V_root_rot,
        V_lpos,
        V_lrot,
        parents,
        names,
        order,
        dt,
        start_position=None,
        start_rotation=None,
):
    if start_position is not None and start_rotation is not None:
        offset_pos = V_root_pos[0:1].copy()
        offset_rot = V_root_rot[0:1].copy()

        V_root_pos = quat.mul_vec(quat.inv(offset_rot), V_root_pos - offset_pos)
        V_root_rot = quat.mul(quat.inv(offset_rot), V_root_rot)
        V_root_pos = (
                quat.mul_vec(start_rotation[np.newaxis], V_root_pos) + start_position[np.newaxis]
        )
        V_root_rot = quat.mul(start_rotation[np.newaxis], V_root_rot)

    V_lpos = V_lpos.copy()
    V_lrot = V_lrot.copy()
    V_lpos[:, 0] = quat.mul_vec(V_root_rot, V_lpos[:, 0]) + V_root_pos
    V_lrot[:, 0] = quat.mul(V_root_rot, V_lrot[:, 0])

    ### save something to debug ,change by raphaelmeng
    np.save(filename[:-4] + "_local_poss.npy", V_lpos.copy())
    np.save(filename[:-4] + "_local_rots.npy", np.degrees(quat.to_euler(V_lrot, order=order)).copy())
    ###

    bvh.save(
        filename,
        dict(
            order=order,
            offsets=V_lpos[0],
            names=names,
            frametime=dt,
            parents=parents,
            positions=V_lpos,
            rotations=np.degrees(quat.to_euler(V_lrot, order=order)),
        ),
    )

May 29 '23 14:05 MengHao666

ubisoft-laforge-ZeroEGGS ubisoft-laforge-ZeroEGGS copied to clipboard

would torch.float32 type influence precision as you use RNN in gesture decoder?

ubisoft-laforge-ZeroEGGS
ubisoft-laforge-ZeroEGGS copied to clipboard