would torch.float32 type influence precision as you use RNN in gesture decoder? #42

MengHao666 · 2023-05-28T16:06:35Z

I found that it would cause that the reuslts might be different when the rnn decoder run many times.
I guess this is beacuse of float32 type would harm the precision. could you give some advice or provide the pretrained model in float64.

MengHao666 · 2023-05-29T14:03:55Z

the code are like following. There are 2 test data. the result that when you use such for loop is different from the result when you only one data to run the program respectively.

import json
import sys
import time
from pathlib import Path

from tqdm import tqdm

sys.path.append("E:\PycharmProjects\Speech2Gesture\ZEGGS")
from ZEGGS.generate import generate_gesture

all_data = [


    {
        "audio_path": "../data/clean/012_Happy_1_x_1_0.wav",
        "style_encoding_type": "example",
        "style": "../data/clean/012_Happy_1_x_1_0.bvh",
        "frames": None,
        "first_pose": None,
        "file_name": None
    },
    {
        "audio_path": "../data/clean/018_Relaxed_2_x_1_0.wav",
        "style_encoding_type": "example",
        "style": "../data/clean/018_Relaxed_2_x_1_0.bvh",
        "frames": None,
        "first_pose": None,
        "file_name": None
    },
]

temperature=1.0
seed=1234
use_gpu=True

for post_data in tqdm(all_data):
    audio_path = post_data["audio_path"]
    style_encoding_type = post_data["style_encoding_type"]
    style = post_data["style"]
    frames = post_data["frames"]
    first_pose = post_data["first_pose"]
    file_name = post_data["file_name"]

    ##
    options_file="../data/outputs/v1/options.json"
    ##path in option.json
    with open(options_file, "r") as f:
        options = json.load(f)
    ##paths
    train_options = options["train_opt"]
    network_options = options["net_opt"]
    paths = options["paths"]

    base_path = Path(paths["base_path"])
    data_path = base_path / paths["path_processed_data"]

    network_path = Path(paths["models_dir"])
    output_path = Path(paths["output_dir"])

    results_path = None
    if results_path is None:
        results_path = Path(output_path) / f"results_service_debug"
    ##paths

    styles = [(Path(style), frames)] if style_encoding_type == "example" else [style]

    print("*"*100)

    print(Path(audio_path))
    print(styles)
    print(data_path)
    print(results_path)
    print(style_encoding_type)
    print(file_name)
    print(first_pose)
    print(temperature)
    print(seed)
    print(use_gpu)
    print("\n" * 5)




    generate_gesture(
        audio_file=Path(audio_path),
        styles=styles,
        network_path=network_path,
        data_path=data_path,
        results_path=results_path,
        style_encoding_type=style_encoding_type,
        file_name=file_name,
        first_pose=first_pose,
        temperature=temperature,
        seed=seed,
        use_gpu=use_gpu
    )

I save a temp para to check the results in 'write_bvh' function, that is

def write_bvh(
        filename,
        V_root_pos,
        V_root_rot,
        V_lpos,
        V_lrot,
        parents,
        names,
        order,
        dt,
        start_position=None,
        start_rotation=None,
):
    if start_position is not None and start_rotation is not None:
        offset_pos = V_root_pos[0:1].copy()
        offset_rot = V_root_rot[0:1].copy()

        V_root_pos = quat.mul_vec(quat.inv(offset_rot), V_root_pos - offset_pos)
        V_root_rot = quat.mul(quat.inv(offset_rot), V_root_rot)
        V_root_pos = (
                quat.mul_vec(start_rotation[np.newaxis], V_root_pos) + start_position[np.newaxis]
        )
        V_root_rot = quat.mul(start_rotation[np.newaxis], V_root_rot)

    V_lpos = V_lpos.copy()
    V_lrot = V_lrot.copy()
    V_lpos[:, 0] = quat.mul_vec(V_root_rot, V_lpos[:, 0]) + V_root_pos
    V_lrot[:, 0] = quat.mul(V_root_rot, V_lrot[:, 0])

    ### save something to debug ,change by raphaelmeng
    np.save(filename[:-4] + "_local_poss.npy", V_lpos.copy())
    np.save(filename[:-4] + "_local_rots.npy", np.degrees(quat.to_euler(V_lrot, order=order)).copy())
    ###

    bvh.save(
        filename,
        dict(
            order=order,
            offsets=V_lpos[0],
            names=names,
            frametime=dt,
            parents=parents,
            positions=V_lpos,
            rotations=np.degrees(quat.to_euler(V_lrot, order=order)),
        ),
    )

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

would torch.float32 type influence precision as you use RNN in gesture decoder? #42

would torch.float32 type influence precision as you use RNN in gesture decoder? #42

MengHao666 commented May 28, 2023

MengHao666 commented May 29, 2023

would torch.float32 type influence precision as you use RNN in gesture decoder? #42

would torch.float32 type influence precision as you use RNN in gesture decoder? #42

Comments

MengHao666 commented May 28, 2023

MengHao666 commented May 29, 2023