Skip to content

Commit

Permalink
fixed defaults of conformer models. (#3836)
Browse files Browse the repository at this point in the history
Signed-off-by: Vahid <[email protected]>
  • Loading branch information
VahidooX authored Mar 13, 2022
1 parent a3958b5 commit b31d8aa
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 8 deletions.
2 changes: 1 addition & 1 deletion examples/asr/conf/conformer/conformer_ctc_bpe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ model:
# you may find more detail on how to train a tokenizer at: /scripts/tokenizers/process_asr_text_tokenizer.py
tokenizer:
dir: ??? # path to directory which contains either tokenizer.model (bpe) or vocab.txt (wpe)
type: wpe # Can be either bpe (SentencePiece tokenizer) or wpe (WordPiece tokenizer)
type: bpe # Can be either bpe (SentencePiece tokenizer) or wpe (WordPiece tokenizer)

preprocessor:
_target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
Expand Down
8 changes: 4 additions & 4 deletions examples/asr/conf/conformer/conformer_transducer_bpe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ model:
shuffle: true
num_workers: 8
pin_memory: false
use_start_end_token: true
use_start_end_token: false
trim_silence: false
max_duration: 16.7 # it is set for LibriSpeech, you may need to update it for your dataset
min_duration: 0.1
Expand All @@ -58,7 +58,7 @@ model:
shuffle: false
num_workers: 8
pin_memory: false
use_start_end_token: true
use_start_end_token: false

test_ds:
manifest_filepath: null
Expand All @@ -67,12 +67,12 @@ model:
shuffle: false
num_workers: 8
pin_memory: false
use_start_end_token: true
use_start_end_token: false

# You may find more detail on how to train a tokenizer at: /scripts/tokenizers/process_asr_text_tokenizer.py
tokenizer:
dir: ??? # path to directory which contains either tokenizer.model (bpe) or vocab.txt (for wpe)
type: wpe # Can be either bpe (SentencePiece tokenizer) or wpe (WordPiece tokenizer)
type: bpe # Can be either bpe (SentencePiece tokenizer) or wpe (WordPiece tokenizer)

preprocessor:
_target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ model:
shuffle: true
num_workers: 8
pin_memory: false
use_start_end_token: true
use_start_end_token: false
trim_silence: false
max_duration: 16.7 # it is set for LibriSpeech, you may need to update it for your dataset
min_duration: 0.1
Expand All @@ -59,7 +59,7 @@ model:
shuffle: false
num_workers: 8
pin_memory: false
use_start_end_token: true
use_start_end_token: false

test_ds:
manifest_filepath: null
Expand All @@ -68,7 +68,7 @@ model:
shuffle: false
num_workers: 8
pin_memory: false
use_start_end_token: true
use_start_end_token: false

# You may find more detail on how to train a monolingual tokenizer at: /scripts/tokenizers/process_asr_text_tokenizer.py
tokenizer:
Expand Down

0 comments on commit b31d8aa

Please sign in to comment.