-
Notifications
You must be signed in to change notification settings - Fork 0
/
tgi.py
101 lines (93 loc) · 2.58 KB
/
tgi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import os
import json
import logging
import requests
import argparse
from tqdm import tqdm
from multiprocessing.pool import Pool
"""
def run_eval(
input_prompt: str,
hf_token: str,
api_url: str,
max_new_tokens: int,
temperature: float,
top_p: float,
logger: logging.Logger = None,
):
"""
def run_eval(inputs):
input_prompt, hf_token, api_url, max_new_tokens, temperature, top_p, logger = inputs
data = {
"inputs": input_prompt,
"parameters": {
"max_new_tokens": max_new_tokens,
"do_sample": True,
"top_p": top_p,
"temperature": temperature,
"best_of" : 1,
#"stop" :
},
}
headers = {
"Content-Type": "application/json",
}
response = requests.post(
api_url,
headers=headers,
data=json.dumps(data),
auth=("hf", hf_token),
stream=False,
)
if response.status_code == 200:
response_data = response.json()
generated_text = response_data[0]["generated_text"]
return generated_text, True
else:
logger.error(f"Request failed with status code: {response.status_code}")
return "", False
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--seed",
default=0,
type=int,
help="Seed for random number generator",
)
parser.add_argument(
"--max_new_tokens",
default=20,
type=int,
help="max number of new tokens to generate.",
)
parser.add_argument(
"--top_p",
default=0.95,
type=float,
help="top_p argument for nucleus sampling.",
)
parser.add_argument(
"--temperature",
default=0.2,
type=float,
help="temperature of the generation.",
)
return parser.parse_args()
if __name__ == "__main__":
args = get_args()
logger = logging.getLogger(__name__)
prompts = [
"What is the capital of Cameroon?",
"What is the capital of Burundi?",
"What is the capital of Ethiopia?",
"What is the capital of Rwanda?",
]
hf_token = os.environ.get("HF_TOKEN", "<YOUR TOKEN HERE>")
api_url = os.environ.get("API_URL", "https://api-inference.huggingface.co/models/codellama/CodeLlama-13b-hf")
inputs = [
(prompt, hf_token, api_url, args.max_new_tokens, args.temperature, args.top_p, logger)
for prompt in prompts
]
with Pool(12) as pool:
review_jsons = list(tqdm(pool.imap(run_eval, inputs), total=len(prompts)))
print(f"REVIEW {review_jsons}")