intel · kevinintel · Jun 14, 2024 · May 30, 2024 · May 30, 2024 · May 30, 2024
diff --git a/examples/modelscope/README.md b/examples/modelscope/README.md
@@ -0,0 +1,24 @@
+# ModelScope with ITREX
+
+Intel extension for transformers(ITREX) support almost all the LLMs in Pytorch format from ModelScope such as phi,Qwen,ChatGLM,Baichuan,gemma,etc.
+
+## Usage Example
+
+ITREX provides a script that demonstrates the use of modelscope. Run it with the following command:
+```bash
+numactl -m 0 -C 0-55 python run_modelscope_example.py --model_path=qwen/Qwen-7B --prompt=你好
+```
+
+## Supported and Validated Models
+We have validated the majority of existing models using modelscope==1.13.1:
+* [qwen/Qwen-7B](https://www.modelscope.cn/models/qwen/Qwen-7B/summary)
+* [ZhipuAI/ChatGLM-6B](https://www.modelscope.cn/models/ZhipuAI/ChatGLM-6B/summary)
+* [ZhipuAI/chatglm2-6b](https://www.modelscope.cn/models/ZhipuAI/chatglm2-6b/summary)
+* [ZhipuAI/chatglm3-6b](https://www.modelscope.cn/models/ZhipuAI/chatglm3-6b/summary)
+* [baichuan-inc/Baichuan2-7B-Chat](https://www.modelscope.cn/models/baichuan-inc/Baichuan2-7B-Chat/summary)
+* [baichuan-inc/Baichuan2-13B-Chat](https://www.modelscope.cn/models/baichuan-inc/Baichuan2-13B-Chat/summary)
+* [LLM-Research/Phi-3-mini-4k-instruct](https://www.modelscope.cn/models/LLM-Research/Phi-3-mini-4k-instruct/summary)
+* [LLM-Research/Phi-3-mini-128k-instruct](https://www.modelscope.cn/models/LLM-Research/Phi-3-mini-128k-instruct/summary)
+* [AI-ModelScope/gemma-2b](https://www.modelscope.cn/models/AI-ModelScope/gemma-2b/summary)
+
+If you encounter any problems, please let us know.
diff --git a/examples/modelscope/run_modelscope_example.py b/examples/modelscope/run_modelscope_example.py
@@ -0,0 +1,30 @@
+from transformers import TextStreamer
+from modelscope import AutoTokenizer
+from intel_extension_for_transformers.transformers import AutoModelForCausalLM
+from typing import List, Optional
+import argparse
+
+def main(args_in: Optional[List[str]] = None) -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_path", type=str, help="Model name: String", required=True, default="qwen/Qwen-7B")
+    parser.add_argument(
+        "-p",
+        "--prompt",
+        type=str,
+        help="Prompt to start generation with: String (default: empty)",
+        default="你好，你可以做点什么？",
+    )
+    parser.add_argument("--benchmark", action="store_true")
+    parser.add_argument("--use_neural_speed", action="store_true")
+    args = parser.parse_args(args_in)
+    print(args)
+    model_name = args.model_path     # Modelscope model_id or local model
+    prompt = args.prompt
+    model = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=True, model_hub="modelscope")
+    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+    inputs = tokenizer(prompt, return_tensors="pt").input_ids
+    streamer = TextStreamer(tokenizer)
+    outputs = model.generate(inputs, streamer=streamer, max_new_tokens=300)
+
+if __name__ == "__main__":
+    main()