diff --git a/tools/ckpts/convert_neox_to_hf.py b/tools/ckpts/convert_neox_to_hf.py index 1d3e2c1f0..35812383e 100644 --- a/tools/ckpts/convert_neox_to_hf.py +++ b/tools/ckpts/convert_neox_to_hf.py @@ -277,6 +277,11 @@ def __init__(self, neox_config): ), "use_parallel_residual": get_key(neox_config, "gpt-j-residual", False), "layer_norm_eps": get_key(neox_config, "layernorm-epsilon", 1e-5), + "intermediate_size": get_key( + neox_config, + "intermediate-size", + 4 * get_key(neox_config, "hidden-size"), + ), } ) hf_config = GPTNeoXConfig(**args)