intel · kevinintel · Jun 14, 2024 · May 30, 2024 · May 30, 2024 · May 30, 2024
diff --git a/examples/modelscope/README.md b/examples/modelscope/README.md
@@ -0,0 +1,24 @@
+# ModelScope with ITREX
+
+Intel® Extension for Transformers(ITREX) support almost all the LLMs in Pytorch format from ModelScope such as phi, Qwen, ChatGLM, Baichuan, gemma, etc.
+
+## Usage Example
+
+ITREX provides a script that demonstrates the use of modelscope. Use numactl to improve performance and run it with the following command:
+```bash
+OMP_NUM_THREADS=num_cores numactl -l -C 0-num_cores-1 python run_modelscope_example.py --model=qwen/Qwen-7B --prompt=你好
+```
+
+## Supported and Validated Models
+We have validated the majority of existing models using modelscope==1.13.1:
+* [qwen/Qwen-7B](https://www.modelscope.cn/models/qwen/Qwen-7B/summary)
+* [ZhipuAI/ChatGLM-6B](https://www.modelscope.cn/models/ZhipuAI/ChatGLM-6B/summary)(transformers=4.33.1)
+* [ZhipuAI/chatglm2-6b](https://www.modelscope.cn/models/ZhipuAI/chatglm2-6b/summary)(transformers=4.33.1)
+* [ZhipuAI/chatglm3-6b](https://www.modelscope.cn/models/ZhipuAI/chatglm3-6b/summary)(transformers=4.33.1)
+* [baichuan-inc/Baichuan2-7B-Chat](https://www.modelscope.cn/models/baichuan-inc/Baichuan2-7B-Chat/summary)(transformers=4.33.1)
+* [baichuan-inc/Baichuan2-13B-Chat](https://www.modelscope.cn/models/baichuan-inc/Baichuan2-13B-Chat/summary)(transformers=4.33.1)
+* [LLM-Research/Phi-3-mini-4k-instruct](https://www.modelscope.cn/models/LLM-Research/Phi-3-mini-4k-instruct/summary)
+* [LLM-Research/Phi-3-mini-128k-instruct](https://www.modelscope.cn/models/LLM-Research/Phi-3-mini-128k-instruct/summary)
+* [AI-ModelScope/gemma-2b](https://www.modelscope.cn/models/AI-ModelScope/gemma-2b/summary)
+
+If you encounter any problems, please let us know.
diff --git a/examples/modelscope/requirements.txt b/examples/modelscope/requirements.txt
@@ -0,0 +1,13 @@
+intel_extension_for_transformers
+neural-speed
+lm-eval
+sentencepiece
+gguf
+--extra-index-url https://download.pytorch.org/whl/cpu
+torch==2.3.0+cpu
+transformers
+intel_extension_for_pytorch==2.3.0
+tiktoken
+transformers_stream_generator
+zipfile38
+modelscope
diff --git a/examples/modelscope/run_modelscope_example.py b/examples/modelscope/run_modelscope_example.py
@@ -0,0 +1,30 @@
+from transformers import TextStreamer
+from modelscope import AutoTokenizer
+from intel_extension_for_transformers.transformers import AutoModelForCausalLM
+from typing import List, Optional
+import argparse
+
+def main(args_in: Optional[List[str]] = None) -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str, help="Model name: String", required=True, default="qwen/Qwen-7B")
+    parser.add_argument(
+        "-p",
+        "--prompt",
+        type=str,
+        help="Prompt to start generation with: String (default: empty)",
+        default="你好，你可以做点什么？",
+    )
+    parser.add_argument("--benchmark", action="store_true")
+    parser.add_argument("--use_neural_speed", action="store_true")
+    args = parser.parse_args(args_in)
+    print(args)
+    model_name = args.model     # Modelscope model_id or local model
+    prompt = args.prompt
+    model = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=True, model_hub="modelscope")
+    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+    inputs = tokenizer(prompt, return_tensors="pt").input_ids
+    streamer = TextStreamer(tokenizer)
+    outputs = model.generate(inputs, streamer=streamer, max_new_tokens=300)
+
+if __name__ == "__main__":
+    main()
diff --git a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py
@@ -322,6 +322,7 @@ class _BaseQBitsAutoModelClass:
         "whisper",
         "qwen2",
         "gemma",
+        "phi3",
         "tinyllama",
     ]