[Neural Speed] Enable StableLM2-1.6B & StableLM2-Zephyr-1.6B & Stable…

…LM-3B (#156) Co-authored-by: intellinjun <[email protected]>
intel · Mar 15, 2024 · 8728765 · 8728765
1 parent 8d5fe2d
commit 8728765
Show file tree

Hide file tree

Showing 15 changed files with 1,095 additions and 21 deletions.
diff --git a/docs/supported_models.md b/docs/supported_models.md
@@ -259,6 +259,18 @@ Neural Speed supports the following models:
     <td> </td>
     <td>Latest</td>
     <td>2048</td>
+  </tr>
+  <tr>
+    <td><a href="https://huggingface.co/stabilityai/stablelm-3b-4e1t" target="_blank" rel="noopener noreferrer">StableLM-3B</a>,
+    <a href="https://huggingface.co/stabilityai/stablelm-2-1_6b" target="_blank" rel="noopener noreferrer">StableLM2-1_6B</a>
+    <a href="https://huggingface.co/stabilityai/stablelm-2-zephyr-1_6b" target="_blank" rel="noopener noreferrer">StableLM2-Zephyr-1_6B</a></td>
+    <td>✅</td>
+    <td> </td>
+    <td> </td>
+    <td>✅</td>
+    <td> </td>
+    <td> </td>
+    <td>Latest</td>
   </tr>
     <tr>
     <td><a href="https://huggingface.co/openai/whisper-tiny" target="_blank" rel="noopener noreferrer">Whisper-tiny</a>,

diff --git a/neural_speed/__init__.py b/neural_speed/__init__.py
@@ -69,6 +69,8 @@ def __import_package(self, model_type):
             import neural_speed.qwen_cpp as cpp_model
         elif model_type == "phi":
             import neural_speed.phi_cpp as cpp_model
+        elif model_type == "stablelm":
+            import neural_speed.stablelm_cpp as cpp_model
         elif model_type == "whisper":
             import neural_speed.whisper_cpp as cpp_model
         elif model_type == "mixtral":

diff --git a/neural_speed/application/CMakeLists.txt b/neural_speed/application/CMakeLists.txt
@@ -70,6 +70,7 @@ compile_quant(quant_mistral   quant_model.cpp mistral   llama)
 compile_quant(quant_mixtral   quant_model.cpp mixtral   llama)
 compile_quant(quant_qwen   quant_model.cpp qwen   qwen)
 compile_quant(quant_phi   quant_model.cpp phi   phi)
+compile_quant(quant_stablelm   quant_model.cpp stablelm   stablelm)
 compile_quant(quant_whisper   quant_whisper.cpp whisper   whisper)
 
 # all models running
@@ -93,8 +94,9 @@ set(mymap_polyglot 13)
 set(mymap_mistral 14)
 set(mymap_qwen 15)
 set(mymap_phi 16)
-set(mymap_whisper 17)
-set(mymap_mixtral 18)
+set(mymap_stablelm 17)
+set(mymap_whisper 18)
+set(mymap_mixtral 19)
 
 
 
@@ -131,6 +133,7 @@ compile_run(run_baichuan  main_run.cpp   main_pybind.cpp baichuan  baichuan)
 compile_run(run_mistral   main_run.cpp   main_pybind.cpp mistral   llama)
 compile_run(run_qwen      main_run.cpp   main_pybind.cpp qwen      qwen)
 compile_run(run_phi      main_run.cpp   main_pybind.cpp phi      phi)
+compile_run(run_stablelm      main_run.cpp   main_pybind.cpp stablelm      stablelm)
 compile_run(run_mixtral   main_run.cpp   main_pybind.cpp mixtral   llama)
 
 # speech recognition

diff --git a/neural_speed/application/main_pybind.cpp b/neural_speed/application/main_pybind.cpp
@@ -911,10 +911,14 @@ PYBIND11_MODULE(phi_cpp, m)
 
 #elif MODEL_NAME_ID == 17
 
-PYBIND11_MODULE(whisper_cpp, m)
+PYBIND11_MODULE(stablelm_cpp, m)
 
 #elif MODEL_NAME_ID == 18
 
+PYBIND11_MODULE(whisper_cpp, m)
+
+#elif MODEL_NAME_ID == 19
+
 PYBIND11_MODULE(mixtral_cpp, m)
 
 #endif

diff --git a/neural_speed/application/whisper_pybind.cpp b/neural_speed/application/whisper_pybind.cpp
@@ -454,7 +454,7 @@ void Model::inference(const std::string& fname_inp) {
   return;
 }
 
-#if MODEL_NAME_ID == 17
+#if MODEL_NAME_ID == 18
 
 PYBIND11_MODULE(whisper_cpp, m)
 #endif

diff --git a/neural_speed/convert/__init__.py b/neural_speed/convert/__init__.py
@@ -29,14 +29,12 @@
 }
 
 
-def convert_model(model, outfile, outtype="f32", model_hub="huggingface", use_quantized_model=False):
+def convert_model(model, outfile, outtype="f32", format="NE", model_hub="huggingface", use_quantized_model=False):
     if model_hub == "modelscope":
         from modelscope import AutoConfig
-        config = AutoConfig.from_pretrained(model, trust_remote_code=True)
     else:
         from transformers import AutoConfig
-        config = AutoConfig.from_pretrained(model, trust_remote_code=True)
-
+    config = AutoConfig.from_pretrained(model, trust_remote_code=True)
     model_type = model_maps.get(config.model_type, config.model_type)
 
     if use_quantized_model:
@@ -47,6 +45,8 @@ def convert_model(model, outfile, outtype="f32", model_hub="huggingface", use_qu
     cmd.extend(["python", path])
     cmd.extend(["--outfile", outfile])
     cmd.extend(["--outtype", outtype])
+    if model_type in {"phi", "stablelm"}:
+        cmd.extend(["--format", format])
     cmd.extend(["--model_hub", model_hub])
     cmd.extend([model])
-Original file line number
+Diff line change
@@ Expand Up / @@ -454,7 +454,7 @@ void Model::inference(const std::string& fname_inp) { @@
       return;
     }
-    #if MODEL_NAME_ID == 17
+    #if MODEL_NAME_ID == 18
     PYBIND11_MODULE(whisper_cpp, m)
     #endif
@@ Expand Down @@