Skip to content
This repository has been archived by the owner on Aug 30, 2024. It is now read-only.

Commit

Permalink
[Neural Speed] Enable StableLM2-1.6B & StableLM2-Zephyr-1.6B & Stable…
Browse files Browse the repository at this point in the history
…LM-3B (#156)

Co-authored-by: intellinjun <[email protected]>
  • Loading branch information
aahouzi and intellinjun authored Mar 15, 2024
1 parent 8d5fe2d commit 8728765
Show file tree
Hide file tree
Showing 15 changed files with 1,095 additions and 21 deletions.
12 changes: 12 additions & 0 deletions docs/supported_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,18 @@ Neural Speed supports the following models:
<td> </td>
<td>Latest</td>
<td>2048</td>
</tr>
<tr>
<td><a href="https://huggingface.co/stabilityai/stablelm-3b-4e1t" target="_blank" rel="noopener noreferrer">StableLM-3B</a>,
<a href="https://huggingface.co/stabilityai/stablelm-2-1_6b" target="_blank" rel="noopener noreferrer">StableLM2-1_6B</a>
<a href="https://huggingface.co/stabilityai/stablelm-2-zephyr-1_6b" target="_blank" rel="noopener noreferrer">StableLM2-Zephyr-1_6B</a></td>
<td>✅</td>
<td> </td>
<td> </td>
<td>✅</td>
<td> </td>
<td> </td>
<td>Latest</td>
</tr>
<tr>
<td><a href="https://huggingface.co/openai/whisper-tiny" target="_blank" rel="noopener noreferrer">Whisper-tiny</a>,
Expand Down
2 changes: 2 additions & 0 deletions neural_speed/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ def __import_package(self, model_type):
import neural_speed.qwen_cpp as cpp_model
elif model_type == "phi":
import neural_speed.phi_cpp as cpp_model
elif model_type == "stablelm":
import neural_speed.stablelm_cpp as cpp_model
elif model_type == "whisper":
import neural_speed.whisper_cpp as cpp_model
elif model_type == "mixtral":
Expand Down
7 changes: 5 additions & 2 deletions neural_speed/application/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ compile_quant(quant_mistral quant_model.cpp mistral llama)
compile_quant(quant_mixtral quant_model.cpp mixtral llama)
compile_quant(quant_qwen quant_model.cpp qwen qwen)
compile_quant(quant_phi quant_model.cpp phi phi)
compile_quant(quant_stablelm quant_model.cpp stablelm stablelm)
compile_quant(quant_whisper quant_whisper.cpp whisper whisper)

# all models running
Expand All @@ -93,8 +94,9 @@ set(mymap_polyglot 13)
set(mymap_mistral 14)
set(mymap_qwen 15)
set(mymap_phi 16)
set(mymap_whisper 17)
set(mymap_mixtral 18)
set(mymap_stablelm 17)
set(mymap_whisper 18)
set(mymap_mixtral 19)



Expand Down Expand Up @@ -131,6 +133,7 @@ compile_run(run_baichuan main_run.cpp main_pybind.cpp baichuan baichuan)
compile_run(run_mistral main_run.cpp main_pybind.cpp mistral llama)
compile_run(run_qwen main_run.cpp main_pybind.cpp qwen qwen)
compile_run(run_phi main_run.cpp main_pybind.cpp phi phi)
compile_run(run_stablelm main_run.cpp main_pybind.cpp stablelm stablelm)
compile_run(run_mixtral main_run.cpp main_pybind.cpp mixtral llama)

# speech recognition
Expand Down
6 changes: 5 additions & 1 deletion neural_speed/application/main_pybind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -911,10 +911,14 @@ PYBIND11_MODULE(phi_cpp, m)

#elif MODEL_NAME_ID == 17

PYBIND11_MODULE(whisper_cpp, m)
PYBIND11_MODULE(stablelm_cpp, m)

#elif MODEL_NAME_ID == 18

PYBIND11_MODULE(whisper_cpp, m)

#elif MODEL_NAME_ID == 19

PYBIND11_MODULE(mixtral_cpp, m)

#endif
Expand Down
2 changes: 1 addition & 1 deletion neural_speed/application/whisper_pybind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ void Model::inference(const std::string& fname_inp) {
return;
}

#if MODEL_NAME_ID == 17
#if MODEL_NAME_ID == 18

PYBIND11_MODULE(whisper_cpp, m)
#endif
Expand Down
8 changes: 4 additions & 4 deletions neural_speed/convert/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,12 @@
}


def convert_model(model, outfile, outtype="f32", model_hub="huggingface", use_quantized_model=False):
def convert_model(model, outfile, outtype="f32", format="NE", model_hub="huggingface", use_quantized_model=False):
if model_hub == "modelscope":
from modelscope import AutoConfig
config = AutoConfig.from_pretrained(model, trust_remote_code=True)
else:
from transformers import AutoConfig
config = AutoConfig.from_pretrained(model, trust_remote_code=True)

config = AutoConfig.from_pretrained(model, trust_remote_code=True)
model_type = model_maps.get(config.model_type, config.model_type)

if use_quantized_model:
Expand All @@ -47,6 +45,8 @@ def convert_model(model, outfile, outtype="f32", model_hub="huggingface", use_qu
cmd.extend(["python", path])
cmd.extend(["--outfile", outfile])
cmd.extend(["--outtype", outtype])
if model_type in {"phi", "stablelm"}:
cmd.extend(["--format", format])
cmd.extend(["--model_hub", model_hub])
cmd.extend([model])

Expand Down
Loading

0 comments on commit 8728765

Please sign in to comment.