Skip to content
This repository was archived by the owner on Aug 30, 2024. It is now read-only.

Commit

Permalink
Yarn feature (#97)
Browse files Browse the repository at this point in the history
* Add yarn scaleling parameter

* Add yarn scaleling parameter in convert script
  • Loading branch information
xiguiw authored Feb 22, 2024
1 parent 33ffaf0 commit 8c846d6
Show file tree
Hide file tree
Showing 14 changed files with 59 additions and 0 deletions.
4 changes: 4 additions & 0 deletions neural_speed/convert/convert_baichuan.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,10 @@ def baichuan13B_convert(model, tokenizer, dir_model, fname_out, ftype, hparams):
fout.write(struct.pack("f", 10000.0)) # freq_base
fout.write(struct.pack("f", 1.0)) # rope_factor

fout.write(struct.pack("f", 0.0)) # config.json "rope_scaling.factor", not enabled
fout.write(struct.pack("i", 0)) # rope_scaling.original_max_position_embeddings
fout.write(struct.pack("i", 0)) # params["rope_scaling"]["type"] =="yarn" else 0))

fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id is not None else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id is not None else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id is not None else -1))
Expand Down
4 changes: 4 additions & 0 deletions neural_speed/convert/convert_bloom.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ def main(args_in: Optional[List[str]] = None) -> None:
fout.write(struct.pack("f", 10000.0)) # freq_base
fout.write(struct.pack("f", 1.0)) # rope_factor

fout.write(struct.pack("f", 0.0)) # config.json "rope_scaling.factor", not enabled
fout.write(struct.pack("i", 0)) # rope_scaling.original_max_position_embeddings
fout.write(struct.pack("i", 0)) # params["rope_scaling"]["type"] =="yarn" else 0))

fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id is not None else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id is not None else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id is not None else -1))
Expand Down
4 changes: 4 additions & 0 deletions neural_speed/convert/convert_chatglm.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,10 @@ def chatglm2_convert(model, tokenizer, dir_model, fname_out, ftype, hparams):
fout.write(struct.pack("f", 10000.0)) # freq_base
fout.write(struct.pack("f", 1.0)) # rope_factor

fout.write(struct.pack("f", 0.0)) # config.json "rope_scaling.factor", not enabled
fout.write(struct.pack("i", 0)) # rope_scaling.original_max_position_embeddings
fout.write(struct.pack("i", 0)) # params["rope_scaling"]["type"] =="yarn" else 0))

fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id is not None else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id is not None else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id is not None else -1))
Expand Down
4 changes: 4 additions & 0 deletions neural_speed/convert/convert_falcon.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ def main(args_in: Optional[List[str]] = None) -> None:
fout.write(struct.pack("f", 10000.0)) # freq_base
fout.write(struct.pack("f", 1.0)) # rope_factor

fout.write(struct.pack("f", 0.0)) # config.json "rope_scaling.factor", not enabled
fout.write(struct.pack("i", 0)) # rope_scaling.original_max_position_embeddings
fout.write(struct.pack("i", 0)) # params["rope_scaling"]["type"] =="yarn" else 0))

fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id is not None else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id is not None else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id is not None else -1))
Expand Down
4 changes: 4 additions & 0 deletions neural_speed/convert/convert_gptj.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ def main(args_in: Optional[List[str]] = None) -> None:
fout.write(struct.pack("f", 10000.0)) # freq_base
fout.write(struct.pack("f", 1.0)) # rope_factor

fout.write(struct.pack("f", 0.0)) # config.json "rope_scaling.factor", not enabled
fout.write(struct.pack("i", 0)) # rope_scaling.original_max_position_embeddings
fout.write(struct.pack("i", 0)) # params["rope_scaling"]["type"] =="yarn" else 0))

fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id is not None else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id is not None else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id is not None else -1))
Expand Down
4 changes: 4 additions & 0 deletions neural_speed/convert/convert_gptneox.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ def main(args_in: Optional[List[str]] = None) -> None:
fout.write(struct.pack("f", 10000.0)) # freq_base
fout.write(struct.pack("f", 1.0)) # rope_factor

fout.write(struct.pack("f", 0.0)) # config.json "rope_scaling.factor", not enabled
fout.write(struct.pack("i", 0)) # rope_scaling.original_max_position_embeddings
fout.write(struct.pack("i", 0)) # params["rope_scaling"]["type"] =="yarn" else 0))

fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id is not None else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id is not None else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id is not None else -1))
Expand Down
4 changes: 4 additions & 0 deletions neural_speed/convert/convert_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -1093,6 +1093,10 @@ def write_file_header(self, params: Params, file_type: NEFileType) -> None:
self.fout.write(struct.pack("f", params.rope_theta))
self.fout.write(struct.pack("f", params.rope_scale))

self.fout.write(struct.pack("f", 0.0)) # config.json "rope_scaling.factor", not enabled
self.fout.write(struct.pack("i", 0)) # rope_scaling.original_max_position_embeddings
self.fout.write(struct.pack("i", 0)) # params["rope_scaling"]["type"] =="yarn" else 0))

# TODO, bos_token_id = 0 in https://huggingface.co/decapoda-research/llama-7b-hf/blob/main/config.json
# but bos_token_id = 1 in llama.cpp
self.fout.write(struct.pack("i", params.bos_token_id))
Expand Down
4 changes: 4 additions & 0 deletions neural_speed/convert/convert_mistral.py
Original file line number Diff line number Diff line change
Expand Up @@ -1066,6 +1066,10 @@ def write_file_header(self, params: Params, file_type: NEFileType) -> None:
self.fout.write(struct.pack("f", params.rope_theta))
self.fout.write(struct.pack("f", params.rope_scale))

self.fout.write(struct.pack("f", 0.0)) # config.json "rope_scaling.factor", not enabled
self.fout.write(struct.pack("i", 0)) # rope_scaling.original_max_position_embeddings
self.fout.write(struct.pack("i", 0)) # params["rope_scaling"]["type"] =="yarn" else 0))

self.fout.write(
struct.pack("i", 1)
)
Expand Down
5 changes: 5 additions & 0 deletions neural_speed/convert/convert_mpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,11 @@ def main(args_in: Optional[List[str]] = None) -> None:
fout.write(struct.pack("f", hparams.get("rms_norm_eps", 1e-6))) # rms norm eps
fout.write(struct.pack("f", 10000.0)) # freq_base
fout.write(struct.pack("f", 1.0)) # rope_factor

fout.write(struct.pack("f", 0.0)) # config.json "rope_scaling.factor", not enabled
fout.write(struct.pack("i", 0)) # rope_scaling.original_max_position_embeddings
fout.write(struct.pack("i", 0)) # params["rope_scaling"]["type"] =="yarn" else 0))

fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id is not None else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id is not None else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id is not None else -1))
Expand Down
4 changes: 4 additions & 0 deletions neural_speed/convert/convert_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,10 @@ def main(args_in: Optional[List[str]] = None) -> None:
fout.write(struct.pack("f", 10000.0)) # freq_base
fout.write(struct.pack("f", 1.0)) # rope_factor

fout.write(struct.pack("f", 0.0)) # config.json "rope_scaling.factor", not enabled
fout.write(struct.pack("i", 0)) # rope_scaling.original_max_position_embeddings
fout.write(struct.pack("i", 0)) # params["rope_scaling"]["type"] =="yarn" else 0))

fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id is not None else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id is not None else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id is not None else -1))
Expand Down
4 changes: 4 additions & 0 deletions neural_speed/convert/convert_qwen.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ def main(args_in: Optional[List[str]] = None) -> None:
fout.write(struct.pack("f", 10000.0)) # freq_base
fout.write(struct.pack("f", 1.0)) # rope_factor

fout.write(struct.pack("f", 0.0)) # config.json "rope_scaling.factor", not enabled
fout.write(struct.pack("i", 0)) # rope_scaling.original_max_position_embeddings
fout.write(struct.pack("i", 0)) # params["rope_scaling"]["type"] =="yarn" else 0))

fout.write(struct.pack("i", tokenizer.special_tokens['<|endoftext|>']))
fout.write(struct.pack("i", tokenizer.special_tokens['<|endoftext|>']))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id is not None else -1))
Expand Down
4 changes: 4 additions & 0 deletions neural_speed/convert/convert_starcoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ def main(args_in: Optional[List[str]] = None) -> None:
fout.write(struct.pack("f", 10000.0)) # freq_base
fout.write(struct.pack("f", 1.0)) # rope_factor

fout.write(struct.pack("f", 0.0)) # config.json "rope_scaling.factor", not enabled
fout.write(struct.pack("i", 0)) # rope_scaling.original_max_position_embeddings
fout.write(struct.pack("i", 0)) # params["rope_scaling"]["type"] =="yarn" else 0))

fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id is not None else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id is not None else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id is not None else -1))
Expand Down
7 changes: 7 additions & 0 deletions neural_speed/models/model_utils/model_files.h
Original file line number Diff line number Diff line change
Expand Up @@ -1099,6 +1099,10 @@ struct model_file_loader {
file.read_raw(&hparams.rms_norm_eps, sizeof(float));
file.read_raw(&hparams.freq_base, sizeof(float));
file.read_raw(&hparams.freq_scale, sizeof(float));

file.read_raw(&hparams.rope_scaling_factor, sizeof(float));
hparams.original_max_position_embeddings = file.read_u32();
hparams.use_yarn = file.read_u32();
}

void read_ne_vocab() {
Expand Down Expand Up @@ -1219,6 +1223,9 @@ struct model_file_saver {
file.write_raw(&hparams.rms_norm_eps, sizeof(float));
file.write_raw(&hparams.freq_base, sizeof(float));
file.write_raw(&hparams.freq_scale, sizeof(float));
file.write_raw(&hparams.rope_scaling_factor, sizeof(float));
file.write_u32(hparams.original_max_position_embeddings);
file.write_u32(hparams.use_yarn);
}
void write_vocab() {
if (any_file_loader->file_version == MODEL_FILE_VERSION_NE) {
Expand Down
3 changes: 3 additions & 0 deletions neural_speed/models/model_utils/model_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,9 @@ struct model_hparams {

// ChatGLM-1
int32_t inner_hidden_size = 0;
float rope_scaling_factor = 0.0f;
int32_t original_max_position_embeddings = 0;
int32_t use_yarn = 0;

bool operator!=(const model_hparams& other) const {
return static_cast<bool>(memcmp(this, &other, sizeof(model_hparams)));
Expand Down

0 comments on commit 8c846d6

Please sign in to comment.