Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make CodeEval respect device_eval_batch_size #956

Merged
merged 11 commits into from
Mar 1, 2024
6 changes: 3 additions & 3 deletions llmfoundry/utils/builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,10 +497,10 @@ def _validate_cfg(icl_cfg: DictConfig):
icl_cfg.batch_size = default_batch_size
if 'pass_at_k' not in icl_cfg:
icl_cfg.pass_at_k = 1
if 'num_beams' not in icl_cfg:
dakinggg marked this conversation as resolved.
Show resolved Hide resolved
icl_cfg.num_beams = 20
if 'fewshot_random_seed' not in icl_cfg:
icl_cfg.fewshot_random_seed = 1234
if 'generations_per_sample' not in icl_cfg:
icl_cfg.generations_per_sample = 1

for icl_cfg in icl_tasks_list:
assert isinstance(icl_cfg, DictConfig)
Expand Down Expand Up @@ -546,7 +546,7 @@ def _validate_cfg(icl_cfg: DictConfig):
destination_path=destination_path,
fewshot_random_seed=icl_cfg.fewshot_random_seed,
pass_at_k=icl_cfg.pass_at_k,
generations_per_sample=icl_cfg.num_beams,
generations_per_sample=icl_cfg.generations_per_sample,
has_categories=icl_cfg.get('has_categories', False),
cot_delimiter=icl_cfg.get('cot_delimiter', ''),
generation_kwargs=icl_cfg.get('generation_kwargs', {}),
Expand Down
16 changes: 8 additions & 8 deletions scripts/eval/yamls/coding_tasks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,62 +4,62 @@ icl_tasks:
dataset_uri: eval/local_data/programming/human_eval.jsonl # ADD YOUR OWN DATASET URI
num_fewshot: [0]
pass_at_k: 1
num_beams: 5
generations_per_sample: 5
batch_size: 1
icl_task_type: code_evaluation
-
label: human_eval_cpp
dataset_uri: eval/local_data/programming/processed_human_eval_cpp.jsonl # ADD YOUR OWN DATASET URI
num_fewshot: [0]
pass_at_k: 1
num_beams: 5
generations_per_sample: 5
batch_size: 1
icl_task_type: code_evaluation
-
label: human_eval_js
dataset_uri: eval/local_data/programming/processed_human_eval_js.jsonl # ADD YOUR OWN DATASET URI
num_fewshot: [0]
pass_at_k: 1
num_beams: 5
generations_per_sample: 5
batch_size: 1
icl_task_type: code_evaluation
-
label: human_eval_return_simple
dataset_uri: eval/local_data/programming/human_eval_return_simple.jsonl # ADD YOUR OWN DATASET URI
num_fewshot: [0]
pass_at_k: 1
num_beams: 5
generations_per_sample: 5
batch_size: 1
icl_task_type: code_evaluation
-
label: human_eval_return_complex
dataset_uri: eval/local_data/programming/human_eval_return_complex.jsonl # ADD YOUR OWN DATASET URI
num_fewshot: [0]
pass_at_k: 1
num_beams: 5
generations_per_sample: 5
batch_size: 1
icl_task_type: code_evaluation
-
label: human_eval_25
dataset_uri: eval/local_data/programming/human_eval-0.25.jsonl # ADD YOUR OWN DATASET URI
num_fewshot: [0]
pass_at_k: 1
num_beams: 5
generations_per_sample: 5
batch_size: 1
icl_task_type: code_evaluation
-
label: human_eval_50
dataset_uri: eval/local_data/programming/human_eval-0.5.jsonl # ADD YOUR OWN DATASET URI
num_fewshot: [0]
pass_at_k: 1
num_beams: 5
generations_per_sample: 5
batch_size: 1
icl_task_type: code_evaluation
-
label: human_eval_75
dataset_uri: eval/local_data/programming/human_eval-0.75.jsonl # ADD YOUR OWN DATASET URI
num_fewshot: [0]
pass_at_k: 1
num_beams: 5
generations_per_sample: 5
batch_size: 1
icl_task_type: code_evaluation
Loading