forked from WildVision-AI/WildVision-Bench
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_vllm.py
172 lines (150 loc) · 7 KB
/
run_vllm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import fire
import json
import os
import yaml
import datasets
import tiktoken
from datasets import load_dataset
from pathlib import Path
from bench_utils import (
load_model_answers,
model_name_to_id
)
from icecream import ic
from vllm import LLM, SamplingParams
from PIL import Image
from io import BytesIO
from tqdm import tqdm
import base64
workers = []
worker_initiated = False
default_generation_config = {
"temperature": 0.0,
"top_p": 1.0,
"max_new_tokens": 4096,
}
def encode_image(image:Image.Image, image_format="PNG") -> str:
im_file = BytesIO()
image.save(im_file, format=image_format)
im_bytes = im_file.getvalue()
im_64 = base64.b64encode(im_bytes).decode("utf-8")
return json.dumps(im_64)
def image_to_url(image:Image.Image, image_format="PNG") -> str:
image_format = image.format.lower() if image.format else image_format
return f"data:image/{image_format};base64,{encode_image(image, image_format=image_format)}"
def main(
dataset_path: str="WildVision/wildvision-bench",
dataset_name: str="vision_bench_0617",
dataset_split: str="test",
worker_addr: str=None,
model_name: str="mistralai/Pixtral-12B-2409",
tokenizer_mode: str="auto",
results_dir: str=None,
LOG_DIR="./logs",
bench_name="vision_bench_0617",
num_gpu: int=1,
max_model_len: int=None,
):
"""
Args:
dataset_path: the path to the dataset
dataset_name: the name of the dataset
dataset_split: the split of the dataset to use
worker_addr: the address of the worker to use
model_name: the name of the model to launch, huggingface model name
LOG_DIR: the directory to save the logs
if worker_addr is provided, the model will be launched on the worker_addr
if worker_addr is not provided, the model will be launched locally
At least one of worker_addr or model_name must be provided
"""
assert model_name is not None or worker_addr is not None, "Either model_name or worker_addr must be provided"
if results_dir is None:
results_dir = f"data/{bench_name}/model_answers/"
os.environ["WILDVISION_ARENA_LOGDIR"] = LOG_DIR
# try load existing generation configs
config_yml_path = Path(os.path.abspath(__file__)).parent / "model_configs" / model_name_to_id(model_name) / "generation_config.yml"
if config_yml_path.exists():
print(f"Loading existing config from {config_yml_path}")
with open(config_yml_path, "r") as f:
config = yaml.safe_load(f)
else:
print(f"No existing model specific config found for {model_name}")
print("Creating new default config based on default_generation_config: {}", default_generation_config)
config_yml_path.parent.mkdir(parents=True, exist_ok=True)
with open(config_yml_path, "w") as f:
yaml.dump(default_generation_config, f)
print(f"Created new default config at {config_yml_path}")
config = default_generation_config
print(f"Loaded generation config: {config}")
# Load the dataset
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
dataset = load_dataset(dataset_path, dataset_name, split=dataset_split)
print(dataset)
existing_answers = load_model_answers(results_dir)
if model_name_to_id(model_name) in existing_answers:
model_existing_answers = existing_answers[model_name_to_id(model_name)]
def map_fill_existing(item):
question_id = item['question_id']
if question_id in model_existing_answers and not "ERROR" in model_existing_answers[question_id]['output']:
print(model_existing_answers[question_id]['output'])
item['output'] = model_existing_answers[question_id]['output']
item['token_len'] = len(encoding.encode(item['output'], disallowed_special=()))
else:
item['output'] = None
item['token_len'] = None
return item
dataset = dataset.map(map_fill_existing, writer_batch_size=200) # pretty weird bug, need to set writer_batch_size to avoid the mapping error
print("Filled existing answers")
to_generate_indices = [i for i, item in enumerate(dataset) if item['output'] is None]
else:
to_generate_indices = list(range(len(dataset)))
print(f"Generating {len(to_generate_indices)} items for {model_name}")
llm = LLM(model=model_name, tokenizer_mode=tokenizer_mode, tensor_parallel_size=num_gpu, max_model_len=max_model_len, trust_remote_code=True)
sampling_params = SamplingParams(
max_tokens=config.get("max_new_tokens", 4096),
top_p=config.get("top_p", 1.0),
temperature=config.get("temperature", 0.0),
)
if len(to_generate_indices) == 0:
print(f"No items to generate for {model_name}")
else:
# all_instructions = dataset['instruction']
all_instructions = [dataset[i]['instruction'] for i in to_generate_indices]
new_dataset = datasets.Dataset.from_dict({
"instruction": all_instructions,
})
def process_messages(item, index):
item['messages'] = [
{
"role": "user",
"content": [{"type": "text", "text": item['instruction']}, {"type": "image_url", "image_url": {"url": image_to_url(dataset[index]['image'])}}]
},
]
item['messages'] = json.dumps(item['messages'])
return item
new_dataset = new_dataset.map(process_messages, num_proc=8, with_indices=True, desc="Processing messages")
all_messages = new_dataset['messages']
all_messages = [json.loads(x) for x in tqdm(all_messages, desc="Loading messages")]
assert not any([x is None for x in all_messages]), "Some messages are None"
# all_messages = new_dataset['messages']
outputs = llm.chat(all_messages, sampling_params=sampling_params)
all_outputs = [x.outputs[0].text for x in outputs]
def map_assign_output(item, index):
if index in to_generate_indices:
item['output'] = all_outputs[to_generate_indices.index(index)]
item['token_len'] = len(encoding.encode(item['output'], disallowed_special=()))
item['model'] = model_name
return item
dataset = dataset.map(map_assign_output, num_proc=2, with_indices=True, desc="Assigning outputs", remove_columns=["image"])
results_file = results_dir + model_name_to_id(model_name) + ".jsonl"
with open(results_file, "w") as f:
for i, item in enumerate(dataset):
f.write(json.dumps(item) + "\n")
# dataset.to_json(results_file, orient="records", lines=True)
# new_dataset.save_to_disk(os.path.join(results_dir, model_name))
print(f"Saved {model_name} answers to {results_file}")
for worker in workers:
worker.terminate()
print("Done")
if __name__ == "__main__":
fire.Fire(main)