Skip to content

Commit

Permalink
black
Browse files Browse the repository at this point in the history
Signed-off-by: Alexandros Koumparoulis <[email protected]>
  • Loading branch information
akoumpa authored and conver334 committed Jul 30, 2024
1 parent 91edebd commit af0b145
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def __init__(
self.tokens_to_generate = tokens_to_generate
self.memmap_workers = memmap_workers
self.hf_dataset = hf_dataset
self.global_sample_mapping = global_sample_mapping
self.global_sample_mapping = global_sample_mapping
self.truncation_method = truncation_method
self.is_test = is_test
self.output_original_text = output_original_text
Expand Down Expand Up @@ -179,7 +179,11 @@ def _maybe_validate_prompt_template(self):

def _build_samples_mapping(self):
if self.max_num_samples is not None:
osm = OnlineSampleMapping(dataset_size=len(self.indexed_dataset), num_samples=self.max_num_samples) if not self.global_sample_mapping else None
osm = (
OnlineSampleMapping(dataset_size=len(self.indexed_dataset), num_samples=self.max_num_samples)
if not self.global_sample_mapping
else None
)
self.samples_mapping = get_samples_mapping(
indexed_dataset=self.indexed_dataset,
data_prefix=self.file_path,
Expand Down
2 changes: 1 addition & 1 deletion scripts/nlp_language_modeling/prepare_packed_ft_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def tokenize_dataset(cfg: 'DictConfig'):
tokens_to_generate=data_cfg.get('tokens_to_generate', 0),
memmap_workers=data_cfg.get('memmap_workers', None),
hf_dataset=data_cfg.get('hf_dataset', False),
global_sample_mapping=data_cfg.get('global_sample_mapping',False),
global_sample_mapping=data_cfg.get('global_sample_mapping', False),
truncation_method=data_cfg.get('truncation_method', 'right'),
special_tokens=data_cfg.get('chat_prompt_tokens', None),
is_test=True,
Expand Down

0 comments on commit af0b145

Please sign in to comment.