-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevaluate.py
158 lines (126 loc) · 5.23 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import argparse
import json
import torch
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset
from transformers import (
AutoModel,
AutoTokenizer,
AutoConfig,
AutoModelForSequenceClassification
)
# Here we load your CustomRewardModelConfig and CustomRewardModel classes,
# so we have the implementation of your get_rewards function
# and load the weights from your saved model.
from model import ClassifierRewardModelConfig, ClassifierRewardModel
def load_json(filename):
"""Load json file"""
with open(filename, 'r') as read_file:
data = json.load(read_file)
return data
def save_dictlist_to_json(mydictlist, filename):
"""Save a list of dictionaries to json file"""
f = open(filename, 'w', encoding='utf-8')
json.dump(mydictlist, f, ensure_ascii=False, indent=4)
f.close()
class TestDataset(Dataset):
"""Simple dataset module for testing the reward model"""
def __init__(self, test_ds):
self.ds = test_ds
def __len__(self):
return len(self.ds)
def __getitem__(self, ix):
return self.ds[ix]
class Reward(torch.nn.Module):
"""
Wrapper class for the reward model,
which handles loading the model and tokenizers,
and the forward pass for final predictions
"""
def __init__(self, model_path):
super().__init__()
# Load student-defined reward model and its associated config
self.config = AutoConfig.from_pretrained(model_path)
self.model = AutoModel.from_pretrained(model_path, config=self.config)
def check_reward_type(self, rewards):
return isinstance(rewards, list) and all(isinstance(r, dict) for r in rewards)
def forward(self, demonstrations):
"""
Get the rewards for the demonstrations.
Args:
demonstrations: list of dicts in the format of
{'chat': str, 'label': int}
Return:
rewards: list of dicts in the format of
{'chat': str, 'label': int, 'reward': float, 'prediction': int}
"""
# ===== Get the rewards from student's reward model =====
rewards = self.model.get_rewards(demonstrations)
# ===== Check the reward format =====
assert self.check_reward_type(rewards), "The rewards must be a list of dicts"
assert len(rewards) == len(demonstrations), "The number of rewards must match the number of demonstration pairs"
return rewards
class Evaluator:
def __init__(self, model_path, ds_test):
# Load the model and dataset
self.load_model(model_path)
self.ds_test = ds_test
self.dataset = TestDataset(ds_test)
self.dataloader = DataLoader(
self.dataset,
batch_size=16,
shuffle=False,
collate_fn=lambda x: x)
def load_model(self, model_path):
"""Load the reward model from the specified path"""
self.model = Reward(model_path)
# Move model to device
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.model.to(self.device)
def evaluate(self):
"""Evaluate the model on the test dataset"""
rewards = []
for batch in tqdm(self.dataloader):
rewards.extend(self.model(batch))
# ===== Check the rewards by doing pair-wise ranking =====
num_correct = sum(reward['label'] == reward['prediction'] for reward in rewards)
acc = num_correct / len(self.ds_test)
print(f"Evaluation Complete. Accuracy: {acc}.")
return rewards
def save_hf_model(hf_model, model_path):
"""Save the model and tokenizer to the specified path"""
hf_model.save_pretrained(model_path)
hf_model.config.save_pretrained(model_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_path",
type=str,
default="models/reward-model",
help="Path to the model")
parser.add_argument(
"--data_path",
type=str,
default="m2_reward_dataset_example.json",
help="Path to the test dataset")
args = parser.parse_args()
hf_pretrained_model_name = "roberta-base"
# Save model and model config to model_path directory
model_config = ClassifierRewardModelConfig.from_pretrained(hf_pretrained_model_name)
model_config.problem_type = 'single_label_classification'
model = ClassifierRewardModel(model_config)
save_hf_model(model, args.model_path)
# Load the dataset
reward_dataset = load_json(args.data_path)
# NOTE: Example of how we will load your model
# Here we need to register the custom reward model and its config
# to the AutoModel and AutoConfig classes, so that we can load
# the model using the AutoModel and AutoConfig classes
AutoConfig.register('ClassifierRewardModel', ClassifierRewardModelConfig)
AutoModel.register(ClassifierRewardModelConfig, ClassifierRewardModel)
# Load config and model from model_path directory
evaluator = Evaluator(args.model_path, reward_dataset)
# Evaluate the model on the test dataset
rewards_json = evaluator.evaluate()
# Save the rewards to a json file
save_dictlist_to_json(rewards_json, 'm2_reward_dataset_syntax-sorcerers_labelled.json')