From 6f7792ac12777aa3c9a9627f4d12d868a991a7e0 Mon Sep 17 00:00:00 2001 From: Jonghwan Hyeon Date: Thu, 17 Nov 2022 18:22:50 +0900 Subject: [PATCH] Fix for getting tokenizer in character-based ASR models when using tarred dataset (#5442) Signed-off-by: Jonghwan Hyeon Signed-off-by: Jonghwan Hyeon Signed-off-by: Hainan Xu --- nemo/collections/asr/models/ctc_models.py | 2 -- nemo/collections/asr/models/rnnt_models.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/nemo/collections/asr/models/ctc_models.py b/nemo/collections/asr/models/ctc_models.py index 8eedc19e88ce..b850943c3632 100644 --- a/nemo/collections/asr/models/ctc_models.py +++ b/nemo/collections/asr/models/ctc_models.py @@ -385,7 +385,6 @@ def _setup_dataloader_from_config(self, config: Optional[Dict]): if is_concat: dataset = audio_to_text_dataset.get_concat_tarred_dataset( config=config, - tokenizer=self.tokenizer, shuffle_n=shuffle_n, global_rank=self.global_rank, world_size=self.world_size, @@ -394,7 +393,6 @@ def _setup_dataloader_from_config(self, config: Optional[Dict]): else: dataset = audio_to_text_dataset.get_tarred_dataset( config=config, - tokenizer=self.tokenizer, shuffle_n=shuffle_n, global_rank=self.global_rank, world_size=self.world_size, diff --git a/nemo/collections/asr/models/rnnt_models.py b/nemo/collections/asr/models/rnnt_models.py index e69050964751..1574b933c1b3 100644 --- a/nemo/collections/asr/models/rnnt_models.py +++ b/nemo/collections/asr/models/rnnt_models.py @@ -497,7 +497,6 @@ def _setup_dataloader_from_config(self, config: Optional[Dict]): if is_concat: dataset = audio_to_text_dataset.get_concat_tarred_dataset( config=config, - tokenizer=self.tokenizer, shuffle_n=shuffle_n, global_rank=self.global_rank, world_size=self.world_size, @@ -506,7 +505,6 @@ def _setup_dataloader_from_config(self, config: Optional[Dict]): else: dataset = audio_to_text_dataset.get_tarred_dataset( config=config, - tokenizer=self.tokenizer, shuffle_n=shuffle_n, global_rank=self.global_rank, world_size=self.world_size,