Skip to content
This repository has been archived by the owner on Aug 30, 2024. It is now read-only.

[Neural Speed] Improvements to run.py script #87

Merged
merged 7 commits into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion scripts/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
# limitations under the License.

import argparse
import sys
from pathlib import Path
from typing import List, Optional
from huggingface_hub import snapshot_download
from neural_speed.convert import convert_model

def main(args_in: Optional[List[str]] = None) -> None:
Expand All @@ -25,14 +27,24 @@ def main(args_in: Optional[List[str]] = None) -> None:
help="output format, default: f32",
default="f32",
)
parser.add_argument(
"--token",
type=str,
help="Access token ID for models that require it (LLaMa2, etc..)",
)
parser.add_argument("--outfile", type=Path, required=True, help="path to write to")
parser.add_argument("model", type=Path, help="directory containing model file or model id")
args = parser.parse_args(args_in)

if args.model.exists():
dir_model = args.model.as_posix()
else:
dir_model = args.model
try:
dir_model = snapshot_download(repo_id=str(args.model), resume_download=True, token=args.token)
except Exception as e:
if e.response.status_code == 401:
print("You are required to input an acccess token ID for {}, please add it in option --token or download model weights locally".format(args.model))
sys.exit(f"{e}")

convert_model(dir_model, args.outfile, args.outtype)

Expand Down
9 changes: 3 additions & 6 deletions scripts/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,12 +133,9 @@ def main(args_in: Optional[List[str]] = None) -> None:
if is_win():
path = Path(args.build_dir, "./Bin/Release/run_{}.exe".format(model_name))
else:
if args.one_click_run == "True":
import neural_speed
package_path = os.path.dirname(neural_speed.__file__)
path = Path(package_path, "./run_{}".format(model_name))
else:
path = Path(args.build_dir, "./bin/run_{}".format(model_name))
Zhenzhong1 marked this conversation as resolved.
Show resolved Hide resolved
import neural_speed
package_path = os.path.dirname(neural_speed.__file__)
path = Path(package_path, "./run_{}".format(model_name))

if not path.exists():
print("Please build graph first or select the correct model name.")
Expand Down
9 changes: 3 additions & 6 deletions scripts/quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,9 @@ def main(args_in: Optional[List[str]] = None) -> None:
if is_win():
path = Path(args.build_dir, "./Bin/Release/quant_{}.exe".format(model_name))
else:
if args.one_click_run == "True":
import neural_speed
package_path = os.path.dirname(neural_speed.__file__)
path = Path(package_path, "./quant_{}".format(model_name))
else:
path = Path(args.build_dir, "./bin/quant_{}".format(model_name))
import neural_speed
package_path = os.path.dirname(neural_speed.__file__)
path = Path(package_path, "./quant_{}".format(model_name))
if not path.exists():
print(path)
print("Please build graph first or select the correct model name.")
Expand Down
22 changes: 17 additions & 5 deletions scripts/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from typing import List, Optional
from transformers import AutoConfig
import subprocess
from huggingface_hub import snapshot_download

model_maps = {"gpt_neox": "gptneox", "gpt_bigcode": "starcoder"}
build_path = Path(Path(__file__).parent.absolute(), "../build/")
Expand Down Expand Up @@ -146,13 +147,24 @@ def main(args_in: Optional[List[str]] = None) -> None:
action="store_true",
help="Use ring-buffer and thus do not re-computing after reaching ctx_size (default: False)",
)
parser.add_argument(
"--token",
type=str,
help="Access token ID for models that require it (LLaMa2, etc..)",
)

args = parser.parse_args(args_in)

if args.model.exists():
dir_model = args.model.as_posix()
else:
dir_model = args.model
try:
dir_model = snapshot_download(repo_id=str(args.model), resume_download=True, token=args.token)
# Handles Missing token ID for gated models
except Exception as e:
if e.response.status_code == 401:
print("You are required to input an acccess token ID for {}, please add it in option --token or download model weights locally".format(args.model))
sys.exit(f"{e}")

parent_path = Path(__file__).parent.absolute()
config = AutoConfig.from_pretrained(dir_model)
Expand All @@ -166,8 +178,8 @@ def main(args_in: Optional[List[str]] = None) -> None:
convert_cmd = ["python", path]
convert_cmd.extend(["--outfile", Path(work_path, "ne_{}_f32.bin".format(model_type))])
convert_cmd.extend(["--outtype", "f32"])
convert_cmd.append(args.model)
print("convert model ...")
convert_cmd.append(dir_model)
print("Convert model ...")
subprocess.run(convert_cmd)

# 2. quantize
Expand All @@ -186,7 +198,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
quant_cmd.extend(["--use_ggml"])
quant_cmd.extend(["--build_dir", args.build_dir])
quant_cmd.extend(["--one_click_run", "True"])
print("quantize model ...")
print("Quantize model ...")
subprocess.run(quant_cmd)

# 3. inference
Expand All @@ -208,7 +220,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
infer_cmd.extend(["--shift-roped-k"])
if (model_type == "baichuan" or model_type == "qwen"):
infer_cmd.extend(["--tokenizer", dir_model])
print("inferce model ...")
print("Inference model ...")
subprocess.run(infer_cmd)


Expand Down
Loading