Skip to content

Commit

Permalink
Make CodeEval respect device_eval_batch_size (#956)
Browse files Browse the repository at this point in the history
  • Loading branch information
josejg authored Mar 1, 2024
1 parent 2436c00 commit d104d16
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 19 deletions.
11 changes: 8 additions & 3 deletions llmfoundry/utils/builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,10 +498,15 @@ def _validate_cfg(icl_cfg: DictConfig):
icl_cfg.batch_size = default_batch_size
if 'pass_at_k' not in icl_cfg:
icl_cfg.pass_at_k = 1
if 'num_beams' not in icl_cfg:
icl_cfg.num_beams = 20
if 'fewshot_random_seed' not in icl_cfg:
icl_cfg.fewshot_random_seed = 1234
if 'generations_per_sample' not in icl_cfg:
icl_cfg.generations_per_sample = 1

if 'num_beams' in icl_cfg:
raise ValueError(
'num_beams is no longer supported as a top level icl_task parameter.' + \
'Please use generation_kwargs.num_beams instead.')

for icl_cfg in icl_tasks_list:
assert isinstance(icl_cfg, DictConfig)
Expand Down Expand Up @@ -547,7 +552,7 @@ def _validate_cfg(icl_cfg: DictConfig):
destination_path=destination_path,
fewshot_random_seed=icl_cfg.fewshot_random_seed,
pass_at_k=icl_cfg.pass_at_k,
generations_per_sample=icl_cfg.num_beams,
generations_per_sample=icl_cfg.generations_per_sample,
has_categories=icl_cfg.get('has_categories', False),
cot_delimiter=icl_cfg.get('cot_delimiter', ''),
generation_kwargs=icl_cfg.get('generation_kwargs', {}),
Expand Down
16 changes: 8 additions & 8 deletions scripts/eval/yamls/coding_tasks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,62 +4,62 @@ icl_tasks:
dataset_uri: eval/local_data/programming/human_eval.jsonl # ADD YOUR OWN DATASET URI
num_fewshot: [0]
pass_at_k: 1
num_beams: 5
generations_per_sample: 5
batch_size: 1
icl_task_type: code_evaluation
-
label: human_eval_cpp
dataset_uri: eval/local_data/programming/processed_human_eval_cpp.jsonl # ADD YOUR OWN DATASET URI
num_fewshot: [0]
pass_at_k: 1
num_beams: 5
generations_per_sample: 5
batch_size: 1
icl_task_type: code_evaluation
-
label: human_eval_js
dataset_uri: eval/local_data/programming/processed_human_eval_js.jsonl # ADD YOUR OWN DATASET URI
num_fewshot: [0]
pass_at_k: 1
num_beams: 5
generations_per_sample: 5
batch_size: 1
icl_task_type: code_evaluation
-
label: human_eval_return_simple
dataset_uri: eval/local_data/programming/human_eval_return_simple.jsonl # ADD YOUR OWN DATASET URI
num_fewshot: [0]
pass_at_k: 1
num_beams: 5
generations_per_sample: 5
batch_size: 1
icl_task_type: code_evaluation
-
label: human_eval_return_complex
dataset_uri: eval/local_data/programming/human_eval_return_complex.jsonl # ADD YOUR OWN DATASET URI
num_fewshot: [0]
pass_at_k: 1
num_beams: 5
generations_per_sample: 5
batch_size: 1
icl_task_type: code_evaluation
-
label: human_eval_25
dataset_uri: eval/local_data/programming/human_eval-0.25.jsonl # ADD YOUR OWN DATASET URI
num_fewshot: [0]
pass_at_k: 1
num_beams: 5
generations_per_sample: 5
batch_size: 1
icl_task_type: code_evaluation
-
label: human_eval_50
dataset_uri: eval/local_data/programming/human_eval-0.5.jsonl # ADD YOUR OWN DATASET URI
num_fewshot: [0]
pass_at_k: 1
num_beams: 5
generations_per_sample: 5
batch_size: 1
icl_task_type: code_evaluation
-
label: human_eval_75
dataset_uri: eval/local_data/programming/human_eval-0.75.jsonl # ADD YOUR OWN DATASET URI
num_fewshot: [0]
pass_at_k: 1
num_beams: 5
generations_per_sample: 5
batch_size: 1
icl_task_type: code_evaluation
16 changes: 8 additions & 8 deletions scripts/eval/yamls/tasks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -178,62 +178,62 @@ icl_tasks:
# dataset_uri: eval/local_data/programming/human_eval.jsonl
# num_fewshot: [0]
# pass_at_k: 1
# num_beams: 20
# generations_per_sample: 20
# batch_size: 1
# icl_task_type: code_evaluation
# -
# label: human_eval_cpp
# dataset_uri: eval/local_data/programming/processed_human_eval_cpp.jsonl
# num_fewshot: [0]
# pass_at_k: 1
# num_beams: 20
# generations_per_sample: 20
# batch_size: 1
# icl_task_type: code_evaluation
# -
# label: human_eval_js
# dataset_uri: eval/local_data/programming/processed_human_eval_js.jsonl
# num_fewshot: [0]
# pass_at_k: 1
# num_beams: 20
# generations_per_sample: 20
# batch_size: 1
# icl_task_type: code_evaluation
# -
# label: human_eval_return_simple
# dataset_uri: eval/local_data/programming/human_eval_return_simple.jsonl
# num_fewshot: [0]
# pass_at_k: 1
# num_beams: 20
# generations_per_sample: 20
# batch_size: 1
# icl_task_type: code_evaluation
# -
# label: human_eval_return_complex
# dataset_uri: eval/local_data/programming/human_eval_return_complex.jsonl
# num_fewshot: [0]
# pass_at_k: 1
# num_beams: 20
# generations_per_sample: 20
# batch_size: 1
# icl_task_type: code_evaluation
# -
# label: human_eval_25
# dataset_uri: eval/local_data/programming/human_eval-0.25.jsonl
# num_fewshot: [0]
# pass_at_k: 1
# num_beams: 20
# generations_per_sample: 20
# batch_size: 1
# icl_task_type: code_evaluation
# -
# label: human_eval_50
# dataset_uri: eval/local_data/programming/human_eval-0.5.jsonl
# num_fewshot: [0]
# pass_at_k: 1
# num_beams: 20
# generations_per_sample: 20
# batch_size: 1
# icl_task_type: code_evaluation
# -
# label: human_eval_75
# dataset_uri: eval/local_data/programming/human_eval-0.75.jsonl
# num_fewshot: [0]
# pass_at_k: 1
# num_beams: 20
# generations_per_sample: 20
# batch_size: 1
# icl_task_type: code_evaluation

0 comments on commit d104d16

Please sign in to comment.