Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace num_beams with generations_per_sample #971

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 48 additions & 32 deletions llmfoundry/utils/builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,16 @@ def build_evaluators(
logger_keys = []
eval_gauntlet_callback = None
if icl_tasks_config is not None:
icl_evaluators, logger_keys, eval_gauntlet_callback = build_icl_data_and_gauntlet(
icl_tasks_config,
eval_gauntlet_config,
tokenizer,
device_eval_batch_size,
icl_seq_len,
fewshot_random_seed,
icl_subset_num_batches,
)
icl_evaluators, logger_keys, eval_gauntlet_callback = (
build_icl_data_and_gauntlet(
icl_tasks_config,
eval_gauntlet_config,
tokenizer,
device_eval_batch_size,
icl_seq_len,
fewshot_random_seed,
icl_subset_num_batches,
))
evaluators.extend(icl_evaluators)

return evaluators, logger_keys, eval_gauntlet_callback
Expand Down Expand Up @@ -131,15 +132,16 @@ def build_icl_data_and_gauntlet(
device_eval_batch_size: int,
icl_seq_len: int,
fewshot_random_seed: Optional[int] = 1234,
icl_subset_num_batches: Optional[int] = None
icl_subset_num_batches: Optional[int] = None,
) -> Tuple[List[Evaluator], List[str], Optional[EvalGauntlet]]:
icl_evaluators, logger_keys = build_icl_evaluators(
icl_tasks_config,
tokenizer,
icl_seq_len,
device_eval_batch_size,
fewshot_random_seed=fewshot_random_seed,
icl_subset_num_batches=icl_subset_num_batches)
icl_subset_num_batches=icl_subset_num_batches,
)
eval_gauntlet_cb = None
if eval_gauntlet_config is not None:
if isinstance(eval_gauntlet_config, str):
Expand Down Expand Up @@ -172,9 +174,10 @@ def build_callback(
elif name == 'memory_snapshot':
return MemorySnapshot(**kwargs)
elif name == 'speed_monitor':
return SpeedMonitor(window_size=kwargs.get('window_size', 1),
gpu_flops_available=kwargs.get(
'gpu_flops_available', None))
return SpeedMonitor(
window_size=kwargs.get('window_size', 1),
gpu_flops_available=kwargs.get('gpu_flops_available', None),
)
elif name == 'fdiff':
return FDiffMetrics(**kwargs)
elif name == 'runtime_estimator':
Expand Down Expand Up @@ -349,8 +352,9 @@ def _extract_param_groups(
group_params.update(param_group_config)

log.debug(
f'Creating optimizer param_group with parameters: {param_names} ' +\
f'(extracted using {str_match=}). The param_group optimizer ' +\
f'Creating optimizer param_group with parameters: {param_names} '
+
f'(extracted using {str_match=}). The param_group optimizer ' +
f'setting overrides are: {param_group_config}.')

params.append(group_params)
Expand Down Expand Up @@ -400,7 +404,8 @@ def build_tokenizer(
os.environ['TRANSFORMERS_NO_ADVISORY_WARNINGS'] = '1'
os.environ['TOKENIZERS_PARALLELISM'] = 'false'

signal_file_path = f'.node_{dist.get_node_rank()}_local_rank0_completed_tokenizer_setup'
signal_file_path = (
f'.node_{dist.get_node_rank()}_local_rank0_completed_tokenizer_setup')

if dist.is_available() and dist.is_initialized(
) and dist.get_world_size() > 1:
Expand Down Expand Up @@ -502,8 +507,16 @@ def _validate_cfg(icl_cfg: DictConfig):
icl_cfg.batch_size = default_batch_size
if 'pass_at_k' not in icl_cfg:
icl_cfg.pass_at_k = 1
if 'num_beams' not in icl_cfg:
icl_cfg.num_beams = 20
if 'generations_per_sample' not in icl_cfg:
icl_cfg.generations_per_sample = 20
if 'num_beams' in icl_cfg:
icl_cfg.generations_per_sample = icl_cfg.num_beams
warnings.warn(
('`num_beams` is deprecated and will be removed in a future release. '
'To specify keyword arguments for `model.generate()`, please use '
'`generation_kwargs` with your corresponding values.'),
DeprecationWarning,
)

for icl_cfg in icl_tasks_list:
assert isinstance(icl_cfg, DictConfig)
Expand Down Expand Up @@ -550,31 +563,34 @@ def _validate_cfg(icl_cfg: DictConfig):
fewshot_random_seed=icl_cfg.get('fewshot_random_seed',
fewshot_random_seed),
pass_at_k=icl_cfg.pass_at_k,
generations_per_sample=icl_cfg.num_beams,
generations_per_sample=icl_cfg.generations_per_sample,
has_categories=icl_cfg.get('has_categories', False),
cot_delimiter=icl_cfg.get('cot_delimiter', ''),
generation_kwargs=icl_cfg.get('generation_kwargs', {}),
early_stopping_criteria=early_stopping_criteria,
do_normalization=icl_cfg.get('do_normalization', True))
if hasattr(
icl_cfg,
'has_categories') and icl_cfg.has_categories and isinstance(
dataloaders, dict):
do_normalization=icl_cfg.get('do_normalization', True),
)
if (hasattr(icl_cfg, 'has_categories') and
icl_cfg.has_categories and isinstance(dataloaders, dict)):
for category in dataloaders.keys():
logger_keys.extend([
f'metrics/{label}/{category}/{m}' for m in metric_names
])
evaluators.append(
Evaluator(label=f'{label}/{category}',
dataloader=dataloaders[category],
metric_names=metric_names),)
Evaluator(
label=f'{label}/{category}',
dataloader=dataloaders[category],
metric_names=metric_names,
),)
else:
logger_keys.extend(
[f'metrics/{label}/{m}' for m in metric_names])
evaluators.append(
Evaluator(label=label,
dataloader=dataloaders,
metric_names=metric_names,
subset_num_batches=icl_subset_num_batches))
Evaluator(
label=label,
dataloader=dataloaders,
metric_names=metric_names,
subset_num_batches=icl_subset_num_batches,
))

return evaluators, logger_keys
16 changes: 8 additions & 8 deletions scripts/eval/yamls/coding_tasks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,62 +4,62 @@ icl_tasks:
dataset_uri: eval/local_data/programming/human_eval.jsonl # ADD YOUR OWN DATASET URI
num_fewshot: [0]
pass_at_k: 1
num_beams: 5
generations_per_sample: 5
batch_size: 1
icl_task_type: code_evaluation
-
label: human_eval_cpp
dataset_uri: eval/local_data/programming/processed_human_eval_cpp.jsonl # ADD YOUR OWN DATASET URI
num_fewshot: [0]
pass_at_k: 1
num_beams: 5
generations_per_sample: 5
batch_size: 1
icl_task_type: code_evaluation
-
label: human_eval_js
dataset_uri: eval/local_data/programming/processed_human_eval_js.jsonl # ADD YOUR OWN DATASET URI
num_fewshot: [0]
pass_at_k: 1
num_beams: 5
generations_per_sample: 5
batch_size: 1
icl_task_type: code_evaluation
-
label: human_eval_return_simple
dataset_uri: eval/local_data/programming/human_eval_return_simple.jsonl # ADD YOUR OWN DATASET URI
num_fewshot: [0]
pass_at_k: 1
num_beams: 5
generations_per_sample: 5
batch_size: 1
icl_task_type: code_evaluation
-
label: human_eval_return_complex
dataset_uri: eval/local_data/programming/human_eval_return_complex.jsonl # ADD YOUR OWN DATASET URI
num_fewshot: [0]
pass_at_k: 1
num_beams: 5
generations_per_sample: 5
batch_size: 1
icl_task_type: code_evaluation
-
label: human_eval_25
dataset_uri: eval/local_data/programming/human_eval-0.25.jsonl # ADD YOUR OWN DATASET URI
num_fewshot: [0]
pass_at_k: 1
num_beams: 5
generations_per_sample: 5
batch_size: 1
icl_task_type: code_evaluation
-
label: human_eval_50
dataset_uri: eval/local_data/programming/human_eval-0.5.jsonl # ADD YOUR OWN DATASET URI
num_fewshot: [0]
pass_at_k: 1
num_beams: 5
generations_per_sample: 5
batch_size: 1
icl_task_type: code_evaluation
-
label: human_eval_75
dataset_uri: eval/local_data/programming/human_eval-0.75.jsonl # ADD YOUR OWN DATASET URI
num_fewshot: [0]
pass_at_k: 1
num_beams: 5
generations_per_sample: 5
batch_size: 1
icl_task_type: code_evaluation
Loading