Skip to content

Commit

Permalink
[Tools] concat folder path in more secure way.
Browse files Browse the repository at this point in the history
Signed-off-by: Duyi-Wang <[email protected]>
  • Loading branch information
Duyi-Wang committed Nov 3, 2023
1 parent 7e448f6 commit 185ee3d
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 36 deletions.
14 changes: 8 additions & 6 deletions tools/chatglm2_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from transformers import AutoTokenizer, AutoModel

dir_path = os.path.dirname(os.path.realpath(__file__))
sys.path.append(dir_path + "/../../../..")
sys.path.append(os.path.join(dir_path, "../../../.."))
sys.path.append(dir_path)


Expand All @@ -33,7 +33,7 @@ def split_and_convert_process(
i, saved_dir, factor, key, args, val, old_name, dtype, num_attention_heads, multi_query_group_num, kv_channels
):
def save_val(val, key, tp_num=None):
path = saved_dir + "/model." + key
path = path = os.path.join(saved_dir, "model." + key)
if tp_num is not None:
path += "." + str(tp_num)
path += ".bin"
Expand Down Expand Up @@ -151,7 +151,7 @@ def split_and_convert(args):
multi_query_group_num = config["chatglm2"]["kv_head_num"] = str(hf_config["multi_query_group_num"])
config["chatglm2"]["pad_id"] = str(hf_config["pad_token_id"])

with open(saved_dir + "/config.ini", "w") as configfile:
with open(os.path.join(saved_dir, "config.ini"), "w") as configfile:
config.write(configfile)
except Exception as e:
print("Fail to save the config in config.ini.", str(e))
Expand Down Expand Up @@ -201,13 +201,15 @@ def split_and_convert(args):
pool = multiprocessing.Pool(args.processes)
for name, param in model_named_parameters.items():
if name == "transformer.embedding.word_embeddings.weight":
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(saved_dir + "model.wte.bin")
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(os.path.join(saved_dir, "model.wte.bin"))
elif name == "transformer.encoder.final_layernorm.weight":
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(
saved_dir + "model.final_layernorm.weight.bin"
os.path.join(saved_dir, "model.final_layernorm.weight.bin")
)
elif name == "transformer.output_layer.weight":
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(saved_dir + "model.lm_head.weight.bin")
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(
os.path.join(saved_dir, "model.lm_head.weight.bin")
)
else:
starmap_args = []
for i in range(len(huggingface_model_name_pattern)):
Expand Down
20 changes: 11 additions & 9 deletions tools/chatglm_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from transformers import AutoTokenizer, AutoModel

dir_path = os.path.dirname(os.path.realpath(__file__))
sys.path.append(dir_path + "/../../../..")
sys.path.append(os.path.join(dir_path, "../../../.."))
sys.path.append(dir_path)


Expand All @@ -31,7 +31,7 @@ def get_weight_data_type(data_type):

def split_and_convert_process(i, saved_dir, factor, key, args, val, old_name, dtype):
def save_val(val, key, tp_num=None):
path = saved_dir + "/model." + key
path = os.path.join(saved_dir, "model." + key)
if tp_num is not None:
path += "." + str(tp_num)
path += ".bin"
Expand Down Expand Up @@ -130,7 +130,7 @@ def split_and_convert(args):
config["chatglm"]["start_id"] = str(hf_config["bos_token_id"])
config["chatglm"]["end_id"] = str(hf_config["eos_token_id"])
config["chatglm"]["weight_data_type"] = args.weight_data_type
with open(saved_dir + "/config.ini", "w") as configfile:
with open(os.path.join(saved_dir, "config.ini"), "w") as configfile:
config.write(configfile)
except Exception as e:
print("Fail to save the config in config.ini.", str(e))
Expand Down Expand Up @@ -181,17 +181,19 @@ def split_and_convert(args):
pool = multiprocessing.Pool(args.processes)
for name, param in model_named_parameters.items():
if name == "transformer.word_embeddings.weight":
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(saved_dir + "model.wte.bin")
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(os.path.join(saved_dir, "model.wte.bin"))
elif name == "transformer.final_layernorm.weight":
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(
saved_dir + "model.final_layernorm.weight.bin"
os.path.join(saved_dir, "model.final_layernorm.weight.bin")
)
elif name == "transformer.final_layernorm.bias":
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(
saved_dir + "model.final_layernorm.bias.bin"
os.path.join(saved_dir, "model.final_layernorm.bias.bin")
)
elif name == "lm_head.weight":
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(saved_dir + "model.lm_head.weight.bin")
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(
os.path.join(saved_dir, "model.lm_head.weight.bin")
)
else:
starmap_args = []
for i in range(len(huggingface_model_name_pattern)):
Expand Down Expand Up @@ -222,8 +224,8 @@ def split_and_convert(args):
torch.multiprocessing.set_sharing_strategy("file_system")

parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('-saved_dir', '-o', type=str, help='file name of output file', required=True)
parser.add_argument('-in_file', '-i', type=str, help='file name of input checkpoint file', required=True)
parser.add_argument("-saved_dir", "-o", type=str, help="file name of output file", required=True)
parser.add_argument("-in_file", "-i", type=str, help="file name of input checkpoint file", required=True)
parser.add_argument("-processes", "-p", type=int, help="processes to spawn for conversion (default: 8)", default=8)
parser.add_argument("-weight_data_type", type=str, default="fp32", choices=["fp32", "fp16"])

Expand Down
28 changes: 17 additions & 11 deletions tools/llama_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from transformers import LlamaForCausalLM, LlamaTokenizer

dir_path = os.path.dirname(os.path.realpath(__file__))
sys.path.append(dir_path + "/../../../..")
sys.path.append(os.path.join(dir_path, "../../../.."))
sys.path.append(dir_path)


Expand All @@ -29,12 +29,14 @@ def get_weight_data_type(data_type):
assert False, f"Invalid weight data type {data_type}"


def split_and_convert_process(i, saved_dir, factor, key, args, val, old_name, dtype, num_attention_heads, num_key_value_heads):
def split_and_convert_process(
i, saved_dir, factor, key, args, val, old_name, dtype, num_attention_heads, num_key_value_heads
):
def save_val(val, key, tp_num=None):
if key.startswith("model."):
path = saved_dir + "/" + key
path = os.path.join(saved_dir, key)
else:
path = saved_dir + "/model." + key
path = os.path.join(saved_dir, "model." + key)

if tp_num is not None:
path += "." + str(tp_num)
Expand Down Expand Up @@ -114,7 +116,9 @@ def split_and_convert(args):
try:
config["llama"]["model_name"] = "llama" if hf_config["_name_or_path"] == "" else hf_config["_name_or_path"]
num_attention_heads = config["llama"]["head_num"] = str(hf_config["num_attention_heads"])
num_key_value_heads = config["llama"]["kv_head_num"] = str(hf_config.get("num_key_value_heads", num_attention_heads))
num_key_value_heads = config["llama"]["kv_head_num"] = str(
hf_config.get("num_key_value_heads", num_attention_heads)
)

hidden_size = hf_config["hidden_size"]
config["llama"]["size_per_head"] = str(hidden_size // hf_config["num_attention_heads"])
Expand All @@ -129,7 +133,7 @@ def split_and_convert(args):
config["llama"]["start_id"] = str(hf_config["bos_token_id"])
config["llama"]["end_id"] = str(hf_config["eos_token_id"])
config["llama"]["weight_data_type"] = args.weight_data_type
with open(saved_dir + "/config.ini", "w") as configfile:
with open(os.path.join(saved_dir, "config.ini"), "w") as configfile:
config.write(configfile)
except Exception as e:
print("Fail to save the config in config.ini.", str(e))
Expand Down Expand Up @@ -181,16 +185,18 @@ def split_and_convert(args):
pool = multiprocessing.Pool(args.processes)
for name, param in model_named_parameters.items():
if name == "model.embed_tokens.weight":
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(saved_dir + "model.wte.bin")
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(os.path.join(saved_dir, "model.wte.bin"))
elif name == "model.norm.weight":
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(
saved_dir + "model.final_layernorm.weight.bin"
os.path.join(saved_dir, "model.final_layernorm.weight.bin")
)
# elif name == 'model.final_layernorm.bias':
# param.detach().cpu().numpy().astype(np_weight_data_type).tofile(
# saved_dir + "model.final_layernorm.bias.bin")
# os.path.join(saved_dir, "model.final_layernorm.bias.bin"))
elif name == "lm_head.weight":
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(saved_dir + "model.lm_head.weight.bin")
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(
os.path.join(saved_dir, "model.lm_head.weight.bin")
)
else:
starmap_args = []
for i in range(len(hf_model_name_pattern)):
Expand All @@ -208,7 +214,7 @@ def split_and_convert(args):
name,
np_weight_data_type,
num_attention_heads,
num_key_value_heads
num_key_value_heads,
)
)
pool.starmap_async(split_and_convert_process, starmap_args)
Expand Down
24 changes: 14 additions & 10 deletions tools/opt_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from transformers.models.opt.modeling_opt import OPTAttention, OPTDecoderLayer

dir_path = os.path.dirname(os.path.realpath(__file__))
sys.path.append(dir_path + "/../../../..")
sys.path.append(os.path.join(dir_path, "../../../.."))
sys.path.append(dir_path)


Expand All @@ -47,7 +47,7 @@ def get_weight_data_type(data_type):
# def split_and_convert_process(i, saved_dir, factor, key, args, val, capture_dict, old_name, dtype):
def split_and_convert_process(i, saved_dir, factor, key, args, val, old_name, dtype):
def save_val(val, key, tp_num=None):
path = saved_dir + "/model." + key
path = os.path.join(saved_dir, "model." + key)
if tp_num is not None:
path += "." + str(tp_num)
path += ".bin"
Expand Down Expand Up @@ -148,7 +148,7 @@ def split_and_convert(args):
config["gpt"]["end_id"] = str(hf_config["eos_token_id"])
config["gpt"]["weight_data_type"] = args.weight_data_type
# config['gpt']['int8'] = str(save_int8) # really useful?
with open(saved_dir + "/config.ini", "w") as configfile:
with open(os.path.join(saved_dir, "config.ini"), "w") as configfile:
config.write(configfile)
except:
print(f"Fail to save the config in config.ini.")
Expand Down Expand Up @@ -216,31 +216,35 @@ def split_and_convert(args):
for name, param in model_named_parameters.items():
if name == "model.decoder.embed_positions.weight":
param[padding_offset:, ...].detach().cpu().numpy().astype(np_weight_data_type).tofile(
saved_dir + "model.wpe.bin"
os.path.join(saved_dir, "model.wpe.bin")
)

elif name == "model.decoder.embed_tokens.weight":
if "model.decoder.project_in.weight" in model_named_parameters.keys():
project_in = model_named_parameters["model.decoder.project_in.weight"]
project_out = model_named_parameters["model.decoder.project_out.weight"]
torch.matmul(param, project_in).detach().cpu().numpy().astype(np_weight_data_type).tofile(
saved_dir + "model.wte.bin"
os.path.join(saved_dir, "model.wte.bin")
)
torch.matmul(param, project_out).detach().cpu().numpy().astype(np_weight_data_type).tofile(
saved_dir + "model.lm_head.weight.bin"
os.path.join(saved_dir, "model.lm_head.weight.bin")
)

else:
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(saved_dir + "model.wte.bin")
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(saved_dir + "model.lm_head.weight.bin")
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(
os.path.join(saved_dir, "model.wte.bin")
)
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(
os.path.join(saved_dir, "model.lm_head.weight.bin")
)

elif name == "model.decoder.final_layer_norm.weight":
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(
saved_dir + "model.final_layernorm.weight.bin"
os.path.join(saved_dir, "model.final_layernorm.weight.bin")
)
elif name == "model.decoder.final_layer_norm.bias":
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(
saved_dir + "model.final_layernorm.bias.bin"
os.path.join(saved_dir, "model.final_layernorm.bias.bin")
)
elif "project_in" in name or "project_out" in name:
continue
Expand Down

0 comments on commit 185ee3d

Please sign in to comment.