From fdb921154dcf512db6f8bce8a403a63784e88fb5 Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <zhenzhong.xu@intel.com>
Date: Sun, 24 Mar 2024 21:58:27 -0700
Subject: [PATCH 1/3] update doc

---
 docs/supported_models.md | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/docs/supported_models.md b/docs/supported_models.md
index df8135677..115dd693a 100644
--- a/docs/supported_models.md
+++ b/docs/supported_models.md
@@ -94,6 +94,7 @@ Neural Speed supports the following models:
   </tr>
   <tr>
     <td><a href="https://huggingface.co/mistralai/Mistral-7B-v0.1" target="_blank" rel="noopener noreferrer">Mistral-7B</a>,
+     <a href="https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2" target="_blank" rel="noopener noreferrer">Mistral-7B-Instruct-v0.2</a>,
      <a href="https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1" target="_blank" rel="noopener noreferrer">Mixtral-8x7B</a></td>
     <td>✅</td>
     <td>✅</td>
@@ -402,7 +403,7 @@ Neural Speed supports the following models:
     <td></td>
   </tr>
   <tr>
-    <td><a href="https://huggingface.co/TheBloke/Mistral-7B-v0.1-GGUF" target="_blank" rel="noopener noreferrer">TheBloke/Mistral-7B-v0.1-GGUF</a>,
+    <td><a href="https://huggingface.co/TheBloke/Mistral-7B-v0.1-GGUF" target="_blank" rel="noopener noreferrer">TheBloke/Mistral-7B-v0.1-GGUF</a>, <a href="https://huggingface.co/TheBloke/Mistral-7B-v0.2-GGUF" target="_blank" rel="noopener noreferrer">TheBloke/Mistral-7B-v0.2-GGUF</a>,
     <td>✅</td>
     <td>✅</td>
     <td>✅</td>
@@ -410,7 +411,7 @@ Neural Speed supports the following models:
     <td></td>
   </tr>
   <tr>
-    <td><a href="https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF" target="_blank" rel="noopener noreferrer">TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUFF</a>,
+    <td><a href="https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF" target="_blank" rel="noopener noreferrer">TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF</a>
     <td>✅</td>
     <td>✅</td>
     <td>✅</td>
@@ -425,18 +426,16 @@ Neural Speed supports the following models:
     <td>✅</td>
     <td></td>
   </tr>
-    </tr>
     <tr>
-    <td><a href="https://huggingface.co/codellama/CodeLlama-7b-hf" target="_blank" rel="noopener noreferrer">TheBloke/CodeLlama-7B-GGUF</a></td>
+    <td><a href="https://huggingface.co/codellama/CodeLlama-7b-hf" target="_blank" rel="noopener noreferrer">TheBloke/CodeLlama-7B-GGUF</a>,<a href="https://huggingface.co/codellama/CodeLlama-13b-hf" target="_blank" rel="noopener noreferrer">TheBloke/CodeLlama-13B-GGUF</a></td>
     <td>✅</td>
     <td>✅</td>
     <td>✅</td>
     <td>✅</td>
     <td></td>
   </tr>
-    </tr>
     <tr>
-    <td><a href="https://huggingface.co/codellama/CodeLlama-13b-hf" target="_blank" rel="noopener noreferrer">TheBloke/CodeLlama-13B-GGUF</a></td>
+    <td><a href="https://huggingface.co/Qwen/Qwen1.5-7B-Chat-GGUF" target="_blank" rel="noopener noreferrer">Qwen1.5-7B-Chat-GGUF</a></td>
     <td>✅</td>
     <td>✅</td>
     <td>✅</td>
@@ -470,7 +469,7 @@ Neural Speed supports the following models:
   </tr>
   <tr>
     <td><a href="https://huggingface.co/Qwen/Qwen-7B-Chat" target="_blank" rel="noopener noreferrer">Qwen-7B-Chat</a>,
-    <a href="https://huggingface.co/Qwen/Qwen1.5-7B-Chat-GGUF" target="_blank" rel="noopener noreferrer">Qwen1.5-7B-Chat-GGUF</a></td>
+    <a href="https://huggingface.co/Qwen/Qwen1.5-7B-Chat" target="_blank" rel="noopener noreferrer">Qwen1.5-7B-Chat</a></td>
     <td>✅</td>
     <td>✅</td>
     <td>✅</td>

From 47cd81ff40f7fa32570c2982723292606cd77f90 Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <zhenzhong.xu@intel.com>
Date: Mon, 25 Mar 2024 01:47:06 -0700
Subject: [PATCH 2/3] fixed the script issue

---
 scripts/python_api_example_for_gguf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/python_api_example_for_gguf.py b/scripts/python_api_example_for_gguf.py
index 905071ec4..ccd5d5688 100644
--- a/scripts/python_api_example_for_gguf.py
+++ b/scripts/python_api_example_for_gguf.py
@@ -50,7 +50,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
 
     gguf_path = args.model.as_posix()
 
-    prompt = "Once upon a time"
+    prompt = args.prompt
     tokenizer = AutoTokenizer.from_pretrained(args.model_path, trust_remote_code=True)
     inputs = tokenizer(prompt, return_tensors="pt").input_ids
     streamer = TextStreamer(tokenizer)

From 0ec8399f135c3d60b7acef8da8e0028876d772f7 Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <zhenzhong.xu@intel.com>
Date: Mon, 25 Mar 2024 03:15:11 -0700
Subject: [PATCH 3/3] add the total number of paraemters

---
 neural_speed/models/model_utils/model_files.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/neural_speed/models/model_utils/model_files.h b/neural_speed/models/model_utils/model_files.h
index b586d0ea5..1a72d4c02 100644
--- a/neural_speed/models/model_utils/model_files.h
+++ b/neural_speed/models/model_utils/model_files.h
@@ -1138,15 +1138,15 @@ struct model_file_loader {
     printf("%-16s %d.hparams.original_max_position_embeddings = %-30d\n", __func__, count++,
            hparams.original_max_position_embeddings);
     printf("%-16s %d.hparams.use_yarn = %-30d\n", __func__, count++, hparams.use_yarn);
-    unsigned int total = 25;
+    unsigned int total = 26;
     if (count != total) {
-      fprintf(stderr, "The number of ne_parameters is wrong.\n");
+      fprintf(stderr, "The number of ne_parameters is wrong, total = %d, count = %d.\n", total, count);
     }
   }
 
   void load_ne_vocab() {
     unsigned int count = 0;
-    unsigned int ne_hparams_total = 25;
+    unsigned int ne_hparams_total = 26;
     file.read_raw(&vocab.bos_token_id, sizeof(model_vocab::id));
     file.read_raw(&vocab.eos_token_id, sizeof(model_vocab::id));
     file.read_raw(&vocab.pad_token_id, sizeof(model_vocab::id));