-
Notifications
You must be signed in to change notification settings - Fork 1
/
sample-config-openai.yaml
121 lines (111 loc) · 2.92 KB
/
sample-config-openai.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# defaults are from llm_evaluate/configs/*.yaml
defaults:
- configs@benchmarks: benchmarks
- configs@datasets: datasets
- configs@extensions: extensions
- configs@metrics: metrics
- configs@model: model
- configs@model_loaders: model_loaders
- configs@parsers: parsers
- _self_
datasets:
demo-chat:
tasks:
random-questions:
task_type: generation
column_input: prompt
column_reference: response
description: Dataset for random generative tasks (single)
metadata:
source: local
data:
- prompt: How to get to Vancouver from Seattle?
response: Take a plane, bus, train or car.
- prompt: Generate 3 male names in a list.
response: One, Two, Three
- prompt: Convert 4.33 x 10 to scientific notation.
response: 4.33e2
demo-classification:
tasks:
random-mcq:
task_type: classification
model_output_parser: KeywordParser
labels:
- A
- B
- C
- D
- E
none_value: E
column_input: prompt
column_reference: response
description: Dataset for random generative tasks (numeric reference)
metadata:
source: local
data:
- prompt: |
Pick the correct answer for the following question:
1 + 1 = ?
A. 0
B. 1
C. 2
D. 3
response: C
- prompt: |
Answer the following multiple choice question:
2 + 2 = ?
A. 1
B. 2
C. 3
D. 4
response: D
- prompt: |
Select the correct response for the following multiple choice question:
1 + 2 = ?
A. 1
B. 2
C. 3
D. 4
response: C
benchmarks:
demo:
demo-chat:
tasks:
random-questions:
metrics:
- rouge
prompt: Answer the following question as concisely as possible.
demo-classification:
tasks:
random-mcq:
metrics:
- accuracy
- precision
- f1
- recall
# model:
# model: google/t5-efficient-tiny
# model_type: hf-automodel
# tokenizer_args:
# model_max_length: 3000
# truncation_side: right
# truncation: longest_first
# model_load_args:
# task_type: T5ForConditionalGeneration
# device_map: cpu
# max_input_tokens: 3000
# torch_dtype: auto
# model_inference_args:
# max_new_tokens: 512
# num_beams: 2
# do_sample: False
# add_to_prompt_start: '[Prompt]'
# add_to_prompt_end: '[Response]'
model:
model_type: openai
model: gpt-4o
model_load_args:
max_input_tokens: 4000
max_new_tokens: 16
add_to_prompt_start: '[Prompt]'
add_to_prompt_end: '[Response]'