Skip to content
This repository has been archived by the owner on Aug 30, 2024. It is now read-only.

[Neural Speed] Fix a blocker on Windows platforms #92

Merged
merged 1 commit into from
Jan 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 3 additions & 17 deletions scripts/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,12 @@
from typing import List, Optional
import subprocess
from transformers import AutoTokenizer
import neural_speed
aahouzi marked this conversation as resolved.
Show resolved Hide resolved

model_maps = {"gpt_neox": "gptneox", "llama2": "llama", "gpt_bigcode": "starcoder"}
build_path = Path(Path(__file__).parent.absolute(), "../build/")


def is_win():
return sys.platform.startswith('win')


def main(args_in: Optional[List[str]] = None) -> None:
parser = argparse.ArgumentParser(description="main program llm running")
parser.add_argument("--model_name", type=str, help="Model name: String", required=True)
Expand Down Expand Up @@ -130,19 +127,8 @@ def main(args_in: Optional[List[str]] = None) -> None:
args = parser.parse_args(args_in)
print(args)
model_name = model_maps.get(args.model_name, args.model_name)
if is_win():
path = Path(args.build_dir, "./Bin/Release/run_{}.exe".format(model_name))
else:
if args.one_click_run == "True":
import neural_speed
package_path = os.path.dirname(neural_speed.__file__)
path = Path(package_path, "./run_{}".format(model_name))
else:
path = Path(args.build_dir, "./bin/run_{}".format(model_name))

if not path.exists():
print("Please build graph first or select the correct model name.")
sys.exit(1)
package_path = os.path.dirname(neural_speed.__file__)
path = Path(package_path, "./run_{}".format(model_name))

cmd = [path]
cmd.extend(["--model", args.model])
Expand Down
20 changes: 3 additions & 17 deletions scripts/quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,12 @@
import argparse
from typing import List, Optional
import subprocess
import neural_speed

model_maps = {"gpt_neox": "gptneox", "llama2": "llama", "gpt_bigcode": "starcoder"}
build_path = Path(Path(__file__).parent.absolute(), "../build/")


def is_win():
return sys.platform.startswith('win')


def str2bool(v):
if isinstance(v, bool):
return v
Expand Down Expand Up @@ -100,19 +97,8 @@ def main(args_in: Optional[List[str]] = None) -> None:
args = parser.parse_args(args_in)

model_name = model_maps.get(args.model_name, args.model_name)
if is_win():
path = Path(args.build_dir, "./Bin/Release/quant_{}.exe".format(model_name))
else:
if args.one_click_run == "True":
import neural_speed
package_path = os.path.dirname(neural_speed.__file__)
path = Path(package_path, "./quant_{}".format(model_name))
else:
path = Path(args.build_dir, "./bin/quant_{}".format(model_name))
if not path.exists():
print(path)
print("Please build graph first or select the correct model name.")
sys.exit(1)
package_path = os.path.dirname(neural_speed.__file__)
path = Path(package_path, "./quant_{}".format(model_name))
a32543254 marked this conversation as resolved.
Show resolved Hide resolved

cmd = [path]
cmd.extend(["--model_file", args.model_file])
Expand Down