amharic_16000.yaml

name: &name "QuartzNet15x5"
sample_rate: &sample_rate 16000
repeat: &repeat 1
dropout: &dropout 0.0
separable: &separable true
labels: &labels [' ', ሀ, ሁ, ሂ, ሃ, ሄ, ህ, ሆ, ለ, ሉ,
                      ሊ, ላ, ሌ, ል, ሎ, ሏ, ሐ, ሑ, ሒ, ሔ, ሕ, መ, ሙ,
                      ሚ, ማ, ሜ, ም, ሞ, ሟ, ሠ, ሡ, ሣ, ሥ, ሦ, ረ, ሩ,
                      ሪ, ራ, ሬ, ር, ሮ, ሯ, ሰ, ሱ, ሲ, ሳ, ሴ, ስ, ሶ,
                      ሷ, ሸ, ሹ, ሺ, ሻ, ሼ, ሽ, ሾ, ሿ, ቀ, ቁ, ቂ, ቃ,
                      ቄ, ቅ, ቆ, ቋ, በ, ቡ, ቢ, ባ, ቤ, ብ, ቦ, ቧ, ቨ,
                      ቪ, ቫ, ቬ, ቭ, ተ, ቱ, ቲ, ታ, ቴ, ት, ቶ, ቷ, ቸ,
                      ቹ, ቺ, ቻ, ቼ, ች, ቾ, ቿ, ኋ, ነ, ኑ, ኒ, ና, ኔ,
                      ን, ኖ, ኗ, ኘ, ኙ, ኚ, ኛ, ኜ, ኝ, ኞ, አ, ኡ, ኢ,
                      ኤ, እ, ኦ, ኧ, ከ, ኩ, ኪ, ካ, ኬ, ክ, ኮ, ኳ, ኸ,
                      ኻ, ኼ, ኽ, ወ, ዉ, ዊ, ዋ, ዌ, ው, ዎ, ዐ, ዑ, ዓ,
                      ዕ, ዘ, ዙ, ዚ, ዛ, ዜ, ዝ, ዞ, ዟ, ዠ, ዢ, ዣ, ዤ,
                      ዥ, ዦ, የ, ዩ, ዪ, ያ, ዬ, ይ, ዮ, ደ, ዱ, ዲ, ዳ,
                      ዴ, ድ, ዶ, ዷ, ጀ, ጁ, ጂ, ጃ, ጄ, ጅ, ጆ, ጇ, ገ,
                      ጉ, ጊ, ጋ, ጌ, ግ, ጎ, ጓ, ጠ, ጡ, ጢ, ጣ, ጤ, ጥ,
                      ጦ, ጧ, ጨ, ጩ, ጪ, ጫ, ጬ, ጭ, ጮ, ጱ, ጲ, ጳ, ጴ,
                      ጵ, ጸ, ጹ, ጺ, ጻ, ጼ, ጽ, ጾ, ጿ, ፀ, ፁ, ፃ, ፄ,
                      ፅ, ፆ, ፈ, ፉ, ፊ, ፋ, ፌ, ፍ, ፎ, ፏ, ፐ, ፑ, ፒ,
                      ፓ, ፔ, ፕ, ፖ]

model:
  train_ds:
    manifest_filepath: ???
    sample_rate: 16000
    labels: *labels
    batch_size: 12
    trim_silence: True
    max_duration: 33.0
    shuffle: True
    is_tarred: False
    tarred_audio_filepaths: null
    num_workers: 16

  validation_ds:
    manifest_filepath: ???
    sample_rate: 16000
    labels: *labels
    batch_size: 12
    shuffle: False
    num_workers: 16

  preprocessor:
    cls: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
    params:
      normalize: "per_feature"
      window_size: 0.02
      sample_rate: *sample_rate
      window_stride: 0.01
      window: "hann"
      features: &n_mels 64
      n_fft: 512
      frame_splicing: 1
      dither: 0.00001
      stft_conv: false

  spec_augment:
    cls: nemo.collections.asr.modules.SpectrogramAugmentation
    params:
      rect_freq: 50
      rect_masks: 5
      rect_time: 120

  encoder:
    cls: nemo.collections.asr.modules.ConvASREncoder
    params:
      feat_in: *n_mels
      activation: relu
      conv_mask: true

      jasper:
        - filters: 128
          repeat: 1
          kernel: [11]
          stride: [1]
          dilation: [1]
          dropout: *dropout
          residual: true
          separable: *separable
          se: true
          se_context_size: -1

        - filters: 256
          repeat: *repeat
          kernel: [13]
          stride: [1]
          dilation: [1]
          dropout: *dropout
          residual: true
          separable: *separable
          se: true
          se_context_size: -1

        - filters: 256
          repeat: *repeat
          kernel: [15]
          stride: [1]
          dilation: [1]
          dropout: *dropout
          residual: true
          separable: *separable
          se: true
          se_context_size: -1

        - filters: 256
          repeat: *repeat
          kernel: [17]
          stride: [1]
          dilation: [1]
          dropout: *dropout
          residual: true
          separable: *separable
          se: true
          se_context_size: -1

        - filters: 256
          repeat: *repeat
          kernel: [19]
          stride: [1]
          dilation: [1]
          dropout: *dropout
          residual: true
          separable: *separable
          se: true
          se_context_size: -1

        - filters: 256
          repeat: 1
          kernel: [21]
          stride: [1]
          dilation: [1]
          dropout: 0.0
          residual: false
          separable: *separable
          se: true
          se_context_size: -1

        - filters: &enc_feat_out 1024
          repeat: 1
          kernel: [1]
          stride: [1]
          dilation: [1]
          dropout: 0.0
          residual: false
          separable: *separable
          se: true
          se_context_size: -1

  decoder:
    cls: nemo.collections.asr.modules.ConvASRDecoder
    params:
      feat_in: 1024
      num_classes: 28
      vocabulary: *labels

  optim:
    name: novograd
    # cls: nemo.core.optim.optimizers.Novograd
    lr: .00005
    # optimizer arguments
    betas: [0.8, 0.5]
    weight_decay: 0.001

    # scheduler setup
    sched:
      name: CosineAnnealing

      # pytorch lightning args
      monitor: val_loss
      reduce_on_plateau: false

      # Scheduler params
      warmup_steps: null
      warmup_ratio: null
      min_lr: 0.0
      last_epoch: -1

trainer:
  gpus: 0 # number of gpus
  max_epochs: 5
  max_steps: null # computed at runtime if not set
  num_nodes: 1
  distributed_backend: ddp
  accumulate_grad_batches: 1
  checkpoint_callback: False  # Provided by exp_manager
  logger: False  # Provided by exp_manager
  row_log_interval: 100  # Interval of logging.
  val_check_interval: 1.0 # check once per epoch .25 for 4 times per epoch

exp_manager:
  exp_dir: null
  name: *name
  create_tensorboard_logger: True
  create_checkpoint_callback: True

hydra:
  run:
    dir: .
  job_logging:
    root:
      handlers: null