- Llama-3-8B
Refer to the README.md
# 1. create directory
mkdir -p /data/mtt/models /data/mtt/models_convert
# 2. clone model
cd /data/mtt/models
git lfs install
git clone https://www.modelscope.cn/LLM-Research/Meta-Llama-3-8B-Instruct.git
If you encounter any issues while downloading the model, please refer to the README.
python -m mttransformer.convert_weight \
--in_file /data/mtt/models/Meta-Llama-3-8B-Instruct/ \
--saved_dir /data/mtt/models_convert/Meta-Llama-3-8B-Instruct-fp16-tp1-convert/ \
--tensor-para-size 1
- start server
python -m vllm.entrypoints.openai.api_server \
--model /data/mtt/models_convert/Meta-Llama-3-8B-Instruct-fp16-tp1-convert/ \
--trust-remote-code \
--tensor-parallel-size 1 \
-pp 1 \
--block-size 64 \
--max-model-len 4096 \
--disable-log-stats \
--disable-log-requests \
--gpu-memory-utilization 0.95 \
--device "musa"
- send message
curl http://0.0.0.0:8000/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "/data/mtt/models_convert/Meta-Llama-3-8B-Instruct-fp16-tp1-convert/",
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Who won the world series in 2020?"}
]
}'