Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

would torch.float32 type influence precision as you use RNN in gesture decoder? #42

Open
MengHao666 opened this issue May 28, 2023 · 1 comment

Comments

@MengHao666
Copy link

I found that it would cause that the reuslts might be different when the rnn decoder run many times.
I guess this is beacuse of float32 type would harm the precision. could you give some advice or provide the pretrained model in float64.

@MengHao666
Copy link
Author

the code are like following. There are 2 test data. the result that when you use such for loop is different from the result when you only one data to run the program respectively.

import json
import sys
import time
from pathlib import Path

from tqdm import tqdm

sys.path.append("E:\PycharmProjects\Speech2Gesture\ZEGGS")
from ZEGGS.generate import generate_gesture

all_data = [


    {
        "audio_path": "../data/clean/012_Happy_1_x_1_0.wav",
        "style_encoding_type": "example",
        "style": "../data/clean/012_Happy_1_x_1_0.bvh",
        "frames": None,
        "first_pose": None,
        "file_name": None
    },
    {
        "audio_path": "../data/clean/018_Relaxed_2_x_1_0.wav",
        "style_encoding_type": "example",
        "style": "../data/clean/018_Relaxed_2_x_1_0.bvh",
        "frames": None,
        "first_pose": None,
        "file_name": None
    },
]

temperature=1.0
seed=1234
use_gpu=True

for post_data in tqdm(all_data):
    audio_path = post_data["audio_path"]
    style_encoding_type = post_data["style_encoding_type"]
    style = post_data["style"]
    frames = post_data["frames"]
    first_pose = post_data["first_pose"]
    file_name = post_data["file_name"]

    ##
    options_file="../data/outputs/v1/options.json"
    ##path in option.json
    with open(options_file, "r") as f:
        options = json.load(f)
    ##paths
    train_options = options["train_opt"]
    network_options = options["net_opt"]
    paths = options["paths"]

    base_path = Path(paths["base_path"])
    data_path = base_path / paths["path_processed_data"]

    network_path = Path(paths["models_dir"])
    output_path = Path(paths["output_dir"])

    results_path = None
    if results_path is None:
        results_path = Path(output_path) / f"results_service_debug"
    ##paths

    styles = [(Path(style), frames)] if style_encoding_type == "example" else [style]

    print("*"*100)

    print(Path(audio_path))
    print(styles)
    print(data_path)
    print(results_path)
    print(style_encoding_type)
    print(file_name)
    print(first_pose)
    print(temperature)
    print(seed)
    print(use_gpu)
    print("\n" * 5)




    generate_gesture(
        audio_file=Path(audio_path),
        styles=styles,
        network_path=network_path,
        data_path=data_path,
        results_path=results_path,
        style_encoding_type=style_encoding_type,
        file_name=file_name,
        first_pose=first_pose,
        temperature=temperature,
        seed=seed,
        use_gpu=use_gpu
    )

I save a temp para to check the results in 'write_bvh' function, that is

def write_bvh(
        filename,
        V_root_pos,
        V_root_rot,
        V_lpos,
        V_lrot,
        parents,
        names,
        order,
        dt,
        start_position=None,
        start_rotation=None,
):
    if start_position is not None and start_rotation is not None:
        offset_pos = V_root_pos[0:1].copy()
        offset_rot = V_root_rot[0:1].copy()

        V_root_pos = quat.mul_vec(quat.inv(offset_rot), V_root_pos - offset_pos)
        V_root_rot = quat.mul(quat.inv(offset_rot), V_root_rot)
        V_root_pos = (
                quat.mul_vec(start_rotation[np.newaxis], V_root_pos) + start_position[np.newaxis]
        )
        V_root_rot = quat.mul(start_rotation[np.newaxis], V_root_rot)

    V_lpos = V_lpos.copy()
    V_lrot = V_lrot.copy()
    V_lpos[:, 0] = quat.mul_vec(V_root_rot, V_lpos[:, 0]) + V_root_pos
    V_lrot[:, 0] = quat.mul(V_root_rot, V_lrot[:, 0])

    ### save something to debug ,change by raphaelmeng
    np.save(filename[:-4] + "_local_poss.npy", V_lpos.copy())
    np.save(filename[:-4] + "_local_rots.npy", np.degrees(quat.to_euler(V_lrot, order=order)).copy())
    ###

    bvh.save(
        filename,
        dict(
            order=order,
            offsets=V_lpos[0],
            names=names,
            frametime=dt,
            parents=parents,
            positions=V_lpos,
            rotations=np.degrees(quat.to_euler(V_lrot, order=order)),
        ),
    )

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant