Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into export_param_fix
Browse files Browse the repository at this point in the history
Signed-off-by: Boris Fomitchev <[email protected]>
  • Loading branch information
borisfom committed Sep 1, 2020
2 parents 7963b4c + 45da106 commit b7e2a47
Show file tree
Hide file tree
Showing 44 changed files with 7,168 additions and 4,606 deletions.
7 changes: 6 additions & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ pipeline {
model.train_ds.batch_size=8 \
model.validation_ds.batch_size=8 \
trainer.max_epochs=1 \
+trainer.max_steps=1 \
model.language_model.pretrained_model_name=bert-base-uncased \
model.dataset.version_2_with_negative=false \
trainer.precision=16 \
Expand All @@ -267,6 +268,7 @@ pipeline {
model.train_ds.batch_size=8 \
model.validation_ds.batch_size=8 \
trainer.max_epochs=1 \
+trainer.max_steps=1 \
model.validation_ds.file=/home/TestData/nlp/squad_mini/v2.0/dev-v2.0.json \
model.language_model.pretrained_model_name=bert-base-uncased \
model.dataset.version_2_with_negative=true \
Expand Down Expand Up @@ -323,6 +325,7 @@ pipeline {
model.validation_ds.batch_size=4 \
trainer.distributed_backend=ddp \
trainer.max_epochs=1 \
+trainer.max_steps=1 \
model.validation_ds.file=/home/TestData/nlp/squad_mini/v2.0/dev-v2.0.json \
model.language_model.pretrained_model_name=megatron-bert-345m-uncased \
model.dataset.version_2_with_negative=true \
Expand Down Expand Up @@ -356,6 +359,7 @@ pipeline {
model.train_ds.batch_size=8 \
model.validation_ds.batch_size=8 \
trainer.max_epochs=1 \
+trainer.max_steps=1 \
model.validation_ds.file=/home/TestData/nlp/squad_mini/v1.1/dev-v1.1.json \
model.dataset.do_lower_case=false \
model.language_model.pretrained_model_name=roberta-base \
Expand Down Expand Up @@ -449,7 +453,8 @@ pipeline {
model.dataset.data_dir=/home/TestData/nlp/glue_fake/MRPC \
trainer.gpus=[0] \
+trainer.fast_dev_run=True \
exp_manager.exp_dir=examples/nlp/glue_benchmark/mrpc'
exp_manager.exp_dir=examples/nlp/glue_benchmark/mrpc \
model.output_dir=examples/nlp/glue_benchmark/mrpc'
sh 'rm -rf examples/nlp/glue_benchmark/mrpc'
}
}
Expand Down
3 changes: 3 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ To run tutorials:
* - ASR
- Online Noise Augmentation
- `05_Online_Noise_Augmentation.ipynb <https://colab.research.google.com/github/NVIDIA/NeMo/blob/main/tutorials/asr/05_Online_Noise_Augmentation.ipynb>`_
* - NLP
- Token Classification (Named Entity Recognition)
- `Token_Classification_Named_Entity_Recognition_tutorial.ipynb <https://colab.research.google.com/github/NVIDIA/NeMo/blob/main/tutorials/nlp/Token_Classification_Named_Entity_Recognition_tutorial.ipynb>`_
* - NLP
- Punctuation and Capitialization
- `Punctuation_and_Capitalization.ipynb <https://colab.research.google.com/github/NVIDIA/NeMo/blob/main/tutorials/nlp/Punctuation_and_Capitalization.ipynb>`_
Expand Down
33 changes: 12 additions & 21 deletions docs/source/nlp/models.rst
Original file line number Diff line number Diff line change
@@ -1,24 +1,15 @@
Models
======

Currently, NeMo's NLP collection supports the following models:

Supported Tasks and Models:

BERT pretraining
----------------
GLUE Benchmark
--------------
Intent Detection and Slot Filling
---------------------------------
Text Classification
-------------------
Name Entity Recognition (NER)
-----------------------------
Punctuation and Capitalization
------------------------------
Question Answering
------------------

Scripts for running these models, can be found under ``<NeMo_Git_root>/example/nlp/``.
NLP tutorial are located under ``<NeMo_Git_root>/tutorials/nlp/``.
NeMo's NLP collection supports the following models:

* BERT pretraining
* GLUE Benchmark
* Intent Detection and Slot Filling
* Text Classification
* Name Entity Recognition (NER)
* Punctuation and Capitalization
* Question Answering

Scripts for running these models, could be found under ``NeMo/example/nlp/``.
NLP tutorials are located under ``NeMo/tutorials/nlp/``.
62 changes: 33 additions & 29 deletions examples/asr/conf/matchboxnet_3x1x64_v1.yaml
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
name: &name "MatchboxNet-3x1x64-v1"
sample_rate: &sample_rate 16000

model:
timesteps: &timesteps 128
repeat: &repeat 1
dropout: &dropout 0.0
kernel_size_factor: &kfactor 1.0
sample_rate: 16000
timesteps: 128
repeat: 1
dropout: 0.0
kernel_size_factor: 1.0

labels: &labels ['bed', 'bird', 'cat', 'dog', 'down', 'eight', 'five', 'four', 'go', 'happy', 'house', 'left', 'marvin',
'nine', 'no', 'off', 'on', 'one', 'right', 'seven', 'sheila', 'six', 'stop', 'three', 'tree', 'two', 'up',
'wow', 'yes', 'zero']
labels_full: ['bed', 'bird', 'cat', 'dog', 'down', 'eight', 'five', 'four', 'go', 'happy', 'house', 'left', 'marvin',
'nine', 'no', 'off', 'on', 'one', 'right', 'seven', 'sheila', 'six', 'stop', 'three', 'tree', 'two', 'up',
'wow', 'yes', 'zero']

labels_subset: ["yes", "no", "up", "down", "left", "right", "on", "off", "stop", "go", "unknown", "silence"]

labels: ${model.labels_full}

train_ds:
manifest_filepath: ???
sample_rate: *sample_rate
labels: *labels
sample_rate: ${model.sample_rate}
labels: ${model.labels}
batch_size: 128
shuffle: True
augmentor:
Expand All @@ -29,16 +33,16 @@ model:

validation_ds:
manifest_filepath: ???
sample_rate: *sample_rate
labels: *labels
sample_rate: ${model.sample_rate}
labels: ${model.labels}
batch_size: 128
shuffle: False
val_loss_idx: 0

test_ds:
manifest_filepath: null
sample_rate: *sample_rate
labels: *labels
sample_rate: ${model.sample_rate}
labels: ${model.labels}
batch_size: 128
shuffle: False
test_loss_idx: 0
Expand Down Expand Up @@ -67,7 +71,7 @@ model:
crop_or_pad_augment:
cls: nemo.collections.asr.modules.CropOrPadSpectrogramAugmentation
params:
audio_length: *timesteps
audio_length: ${model.timesteps}

encoder:
cls: nemo.collections.asr.modules.ConvASREncoder
Expand All @@ -82,57 +86,57 @@ model:
kernel: [11]
stride: [1]
dilation: [1]
dropout: *dropout
dropout: ${model.dropout}
residual: false
separable: true
kernel_size_factor: *kfactor
kernel_size_factor: ${model.kernel_size_factor}

- filters: 64
repeat: *repeat
repeat: ${model.repeat}
kernel: [13]
stride: [1]
dilation: [1]
dropout: *dropout
dropout: ${model.dropout}
residual: true
separable: true
kernel_size_factor: *kfactor
kernel_size_factor: ${model.kernel_size_factor}

- filters: 64
repeat: *repeat
repeat: ${model.repeat}
kernel: [15]
stride: [1]
dilation: [1]
dropout: *dropout
dropout: ${model.dropout}
residual: true
separable: true
kernel_size_factor: *kfactor
kernel_size_factor: ${model.kernel_size_factor}

- filters: 64
repeat: *repeat
repeat: ${model.repeat}
kernel: [17]
stride: [1]
dilation: [1]
dropout: *dropout
dropout: ${model.dropout}
residual: true
separable: true
kernel_size_factor: *kfactor
kernel_size_factor: ${model.kernel_size_factor}

- filters: 128
repeat: 1
kernel: [29]
stride: [1]
dilation: [2]
dropout: *dropout
dropout: ${model.dropout}
residual: false
separable: true
kernel_size_factor: *kfactor
kernel_size_factor: ${model.kernel_size_factor}

- filters: &enc_final_filters 128
repeat: 1
kernel: [1]
stride: [1]
dilation: [1]
dropout: *dropout
dropout: ${model.dropout}
residual: false

decoder:
Expand Down
61 changes: 32 additions & 29 deletions examples/asr/conf/matchboxnet_3x1x64_v2.yaml
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
name: &name "MatchboxNet-3x1x64-v2"

model:
sample_rate: &sample_rate 16000
sample_rate: 16000
timesteps: 128
repeat: 1
dropout: 0.0
kernel_size_factor: 1.0

timesteps: &timesteps 128
repeat: &repeat 1
dropout: &dropout 0.0
kernel_size_factor: &kfactor 1.0
labels_full: ['visual', 'wow', 'learn', 'backward', 'dog', 'two', 'left', 'happy', 'nine', 'go', 'up', 'bed', 'stop',
'one', 'zero', 'tree', 'seven', 'on', 'four', 'bird', 'right', 'eight', 'no', 'six', 'forward', 'house',
'marvin', 'sheila', 'five', 'off', 'three', 'down', 'cat', 'follow', 'yes']

labels: &labels ['visual', 'wow', 'learn', 'backward', 'dog', 'two', 'left', 'happy', 'nine', 'go', 'up', 'bed', 'stop',
'one', 'zero', 'tree', 'seven', 'on', 'four', 'bird', 'right', 'eight', 'no', 'six', 'forward', 'house',
'marvin', 'sheila', 'five', 'off', 'three', 'down', 'cat', 'follow', 'yes']
labels_subset: ["yes", "no", "up", "down", "left", "right", "on", "off", "stop", "go", "unknown", "silence"]

labels: ${model.labels_full}

train_ds:
manifest_filepath: ???
sample_rate: *sample_rate
labels: *labels
sample_rate: ${model.sample_rate}
labels: ${model.labels}
batch_size: 128
shuffle: True
augmentor:
Expand All @@ -30,16 +33,16 @@ model:

validation_ds:
manifest_filepath: ???
sample_rate: *sample_rate
labels: *labels
sample_rate: ${model.sample_rate}
labels: ${model.labels}
batch_size: 128
shuffle: False
val_loss_idx: 0

test_ds:
manifest_filepath: null
sample_rate: *sample_rate
labels: *labels
sample_rate: ${model.sample_rate}
labels: ${model.labels}
batch_size: 128
shuffle: False
test_loss_idx: 0
Expand Down Expand Up @@ -68,7 +71,7 @@ model:
crop_or_pad_augment:
cls: nemo.collections.asr.modules.CropOrPadSpectrogramAugmentation
params:
audio_length: *timesteps
audio_length: ${model.timesteps}

encoder:
cls: nemo.collections.asr.modules.ConvASREncoder
Expand All @@ -83,57 +86,57 @@ model:
kernel: [11]
stride: [1]
dilation: [1]
dropout: *dropout
dropout: ${model.dropout}
residual: false
separable: true
kernel_size_factor: *kfactor
kernel_size_factor: ${model.kernel_size_factor}

- filters: 64
repeat: *repeat
repeat: ${model.repeat}
kernel: [13]
stride: [1]
dilation: [1]
dropout: *dropout
dropout: ${model.dropout}
residual: true
separable: true
kernel_size_factor: *kfactor
kernel_size_factor: ${model.kernel_size_factor}

- filters: 64
repeat: *repeat
repeat: ${model.repeat}
kernel: [15]
stride: [1]
dilation: [1]
dropout: *dropout
dropout: ${model.dropout}
residual: true
separable: true
kernel_size_factor: *kfactor
kernel_size_factor: ${model.kernel_size_factor}

- filters: 64
repeat: *repeat
repeat: ${model.repeat}
kernel: [17]
stride: [1]
dilation: [1]
dropout: *dropout
dropout: ${model.dropout}
residual: true
separable: true
kernel_size_factor: *kfactor
kernel_size_factor: ${model.kernel_size_factor}

- filters: 128
repeat: 1
kernel: [29]
stride: [1]
dilation: [2]
dropout: *dropout
dropout: ${model.dropout}
residual: false
separable: true
kernel_size_factor: *kfactor
kernel_size_factor: ${model.kernel_size_factor}

- filters: &enc_final_filters 128
repeat: 1
kernel: [1]
stride: [1]
dilation: [1]
dropout: *dropout
dropout: ${model.dropout}
residual: false

decoder:
Expand Down
2 changes: 1 addition & 1 deletion examples/asr/speech_to_label.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
--background_data_root=<path where the background data are stored> \
--rebalance_method=<'under' or 'over' of 'fixed'> \
--log
(Optional --generate (for demonstration in tutorial). If you want to use your own background noise data, make sure to delete --generates)
(Optional --demo (for demonstration in tutorial). If you want to use your own background noise data, make sure to delete --demo)
```
## Train to convergence
Expand Down
Loading

0 comments on commit b7e2a47

Please sign in to comment.