NVIDIA · tkornuta-nvidia · Jun 3, 2020 · May 22, 2020 · May 22, 2020 · May 22, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -79,6 +79,8 @@ To release a new version, please update the changelog as followed:
 - ContextNet Encoder + Decoder Initial Support ([PR #630](https://github.com/NVIDIA/NeMo/pull/630)) - @titu1994
 - Added finetuning with Megatron-LM ([PR #601](https://github.com/NVIDIA/NeMo/pull/601)) - @ekmb
 - Added documentation for 8 kHz model ([PR #632](https://github.com/NVIDIA/NeMo/pull/632)) - @jbalam-nv
+- The Neural Graph is a high-level abstract concept empowering the users to build graphs consisting of many, interconnected Neural Modules. A user in his/her application can build any number of graphs, potentially spanning over the same modules. The import/export options combined with the lightweight API make Neural Graphs a perfect tool for rapid prototyping and experimentation. ([PR #413](https://github.com/NVIDIA/NeMo/pull/413)) - @tkornuta-nvidia
+- Created the NeMo CV collection, added  MNIST and CIFAR10 thin datalayers, implemented/ported several general usage trainable and non-trainable modules, added several new ElementTypes ([PR #654](https://github.com/NVIDIA/NeMo/pull/654)) - @tkornuta-nvidia
 
 
 ### Changed
@@ -94,13 +96,6 @@ To release a new version, please update the changelog as followed:
 
 ### Security
 
-### Contributors
-
-## [0.10.2] - 2020-05-05
-
-### Added
-- The Neural Graph is a high-level abstract concept empowering the users to build graphs consisting of many, interconnected Neural Modules. A user in his/her application can build any number of graphs, potentially spanning over the same modules. The import/export options combined with the lightweight API make Neural Graphs a perfect tool for rapid prototyping and experimentation. ([PR #413](https://github.com/NVIDIA/NeMo/pull/413)) - @tkornuta
-
 ## [0.10.0] - 2020-04-03
 
 ### Added

diff --git a/docs/sources/source/collections/modules.rst b/docs/sources/source/collections/modules.rst
@@ -8,5 +8,6 @@ NeMo Collections API
 
    core
    nemo_asr
+   nemo_cv
    nemo_tts
    nemo_nlp
diff --git a/docs/sources/source/collections/nemo_cv.rst b/docs/sources/source/collections/nemo_cv.rst
@@ -0,0 +1,34 @@
+NeMo CV collection
+==================
+
+DataLayers
+----------
+.. automodule:: nemo.collections.cv.modules.data_layers
+    :members:
+    :undoc-members:
+    :show-inheritance:
+    :exclude-members: forward
+
+Trainable Modules
+-----------------
+.. automodule:: nemo.collections.cv.modules.trainables
+    :members:
+    :undoc-members:
+    :show-inheritance:
+    :exclude-members: forward
+
+NonTrainable Modules
+--------------------
+.. automodule:: nemo.collections.cv.modules.non_trainables
+    :members:
+    :undoc-members:
+    :show-inheritance:
+    :exclude-members: forward
+
+Losses
+------
+.. automodule:: nemo.collections.cv.modules.losses
+    :members:
+    :undoc-members:
+    :show-inheritance:
+    :exclude-members: forward
diff --git a/nemo/backends/pytorch/nm.py b/nemo/backends/pytorch/nm.py
@@ -156,8 +156,7 @@ def __init__(self, name=None):
     def __call__(self, force_pt=False, *input, **kwargs):
         pt_call = len(input) > 0 or force_pt
         if pt_call:
-            with t.no_grad():
-                return self.forward(*input, **kwargs)
+            return self.forward(*input, **kwargs)
         else:
             return NeuralModule.__call__(self, **kwargs)
 
@@ -305,13 +304,13 @@ def dataset(self):
         pass
 
     @property
-    @abstractmethod
     def data_iterator(self):
         """"Iterator over the dataset. It is a good idea to return
         torch.utils.data.DataLoader here. Should implement either this or
         `dataset`.
         If this is implemented, `dataset` property should return None.
         """
+        return None
 
     @property
     def batch_size(self):

diff --git a/nemo/collections/cv/README.md b/nemo/collections/cv/README.md
@@ -0,0 +1,6 @@
+NeMo CV Collection: Neural Modules for Computer Vision
+====================================================================
+
+The NeMo CV collection offers modules useful for the following computer vision applications:
+1. Image Classification
+..* classification of MNIST digits with the use of LeNet-5 (a classic hello world)
diff --git a/nemo/collections/cv/__init__.py b/nemo/collections/cv/__init__.py
@@ -0,0 +1,20 @@
+# =============================================================================
+# Copyright (c) 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from nemo.collections.cv.modules import *
+
+# __version__ = "0.1"
+# __name__ = "nemo.collections.cv"
diff --git a/nemo/collections/cv/examples/cifar10_convnet_ffn_image_classification.py b/nemo/collections/cv/examples/cifar10_convnet_ffn_image_classification.py
@@ -0,0 +1,83 @@
+# =============================================================================
+# Copyright (c) 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import argparse
+
+import nemo.utils.argparse as nm_argparse
+from nemo.collections.cv.modules.data_layers import CIFAR10DataLayer
+from nemo.collections.cv.modules.losses import NLLLoss
+from nemo.collections.cv.modules.non_trainables import NonLinearity, ReshapeTensor
+from nemo.collections.cv.modules.trainables import ConvNetEncoder, FeedForwardNetwork
+from nemo.core import (
+    DeviceType,
+    NeuralGraph,
+    NeuralModuleFactory,
+    OperationMode,
+    SimpleLossLoggerCallback,
+    WandbCallback,
+)
+from nemo.utils import logging
+
+if __name__ == "__main__":
+    # Create the default parser.
+    parser = argparse.ArgumentParser(parents=[nm_argparse.NemoArgParser()], conflict_handler='resolve')
+    # Parse the arguments
+    args = parser.parse_args()
+
+    # Instantiate Neural Factory.
+    nf = NeuralModuleFactory(local_rank=args.local_rank, placement=DeviceType.CPU)
+
+    # Data layer for training.
+    cifar10_dl = CIFAR10DataLayer(train=True)
+    # The "model".
+    cnn = ConvNetEncoder(input_depth=3, input_height=32, input_width=32)
+    reshaper = ReshapeTensor(input_sizes=[-1, 16, 2, 2], output_sizes=[-1, 64])
+    ffn = FeedForwardNetwork(input_size=64, output_size=10, dropout_rate=0.1)
+    nl = NonLinearity(type="logsoftmax", sizes=[-1, 10])
+    # Loss.
+    nll_loss = NLLLoss()
+
+    # Create a training graph.
+    with NeuralGraph(operation_mode=OperationMode.training) as training_graph:
+        img, tgt = cifar10_dl()
+        feat_map = cnn(inputs=img)
+        res_img = reshaper(inputs=feat_map)
+        logits = ffn(inputs=res_img)
+        pred = nl(inputs=logits)
+        loss = nll_loss(predictions=pred, targets=tgt)
+        # Set output - that output will be used for training.
+        training_graph.outputs["loss"] = loss
+
+    # Display the graph summmary.
+    logging.info(training_graph.summary())
+
+    # SimpleLossLoggerCallback will print loss values to console.
+    callback = SimpleLossLoggerCallback(
+        tensors=[loss], print_func=lambda x: logging.info(f'Training Loss: {str(x[0].item())}')
+    )
+
+    # Log training metrics to W&B.
+    wand_callback = WandbCallback(
+        train_tensors=[loss], wandb_name="simple-mnist-fft", wandb_project="cv-collection-image-classification",
+    )
+
+    # Invoke the "train" action.
+    nf.train(
+        training_graph=training_graph,
+        callbacks=[callback, wand_callback],
+        optimization_params={"num_epochs": 10, "lr": 0.001},
+        optimizer="adam",
+    )
diff --git a/nemo/collections/cv/examples/cifar10_resnet50_image_classification.py b/nemo/collections/cv/examples/cifar10_resnet50_image_classification.py
@@ -0,0 +1,79 @@
+# =============================================================================
+# Copyright (c) 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import argparse
+
+import nemo.utils.argparse as nm_argparse
+from nemo.collections.cv.modules.data_layers import CIFAR10DataLayer
+from nemo.collections.cv.modules.losses import NLLLoss
+from nemo.collections.cv.modules.non_trainables import NonLinearity
+from nemo.collections.cv.modules.trainables import GenericImageEncoder
+from nemo.core import (
+    DeviceType,
+    NeuralGraph,
+    NeuralModuleFactory,
+    OperationMode,
+    SimpleLossLoggerCallback,
+    WandbCallback,
+)
+from nemo.utils import logging
+
+if __name__ == "__main__":
+    # Create the default parser.
+    parser = argparse.ArgumentParser(parents=[nm_argparse.NemoArgParser()], conflict_handler='resolve')
+    # Parse the arguments
+    args = parser.parse_args()
+
+    # Instantiate Neural Factory.
+    nf = NeuralModuleFactory(local_rank=args.local_rank, placement=DeviceType.CPU)
+
+    # Data layer - upscale the CIFAR10 images to ImageNet resolution.
+    cifar10_dl = CIFAR10DataLayer(height=224, width=224, train=True)
+    # The "model".
+    image_classifier = GenericImageEncoder(model_type="resnet50", output_size=10, pretrained=True, name="resnet50")
+    nl = NonLinearity(type="logsoftmax", sizes=[-1, 10])
+    # Loss.
+    nll_loss = NLLLoss()
+
+    # Create a training graph.
+    with NeuralGraph(operation_mode=OperationMode.training) as training_graph:
+        img, tgt = cifar10_dl()
+        logits = image_classifier(inputs=img)
+        pred = nl(inputs=logits)
+        loss = nll_loss(predictions=pred, targets=tgt)
+        # Set output - that output will be used for training.
+        training_graph.outputs["loss"] = loss
+
+    # Display the graph summmary.
+    logging.info(training_graph.summary())
+
+    # SimpleLossLoggerCallback will print loss values to console.
+    callback = SimpleLossLoggerCallback(
+        tensors=[loss], print_func=lambda x: logging.info(f'Training Loss: {str(x[0].item())}')
+    )
+
+    # Log training metrics to W&B.
+    wand_callback = WandbCallback(
+        train_tensors=[loss], wandb_name="simple-mnist-fft", wandb_project="cv-collection-image-classification",
+    )
+
+    # Invoke the "train" action.
+    nf.train(
+        training_graph=training_graph,
+        callbacks=[callback, wand_callback],
+        optimization_params={"num_epochs": 10, "lr": 0.001},
+        optimizer="adam",
+    )
diff --git a/nemo/collections/cv/examples/cifar10_vgg16_ffn_image_classification.py b/nemo/collections/cv/examples/cifar10_vgg16_ffn_image_classification.py
@@ -0,0 +1,84 @@
+# =============================================================================
+# Copyright (c) 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import argparse
+
+import nemo.utils.argparse as nm_argparse
+from nemo.collections.cv.modules.data_layers import CIFAR10DataLayer
+from nemo.collections.cv.modules.losses import NLLLoss
+from nemo.collections.cv.modules.non_trainables import NonLinearity, ReshapeTensor
+from nemo.collections.cv.modules.trainables import FeedForwardNetwork, GenericImageEncoder
+from nemo.core import (
+    DeviceType,
+    NeuralGraph,
+    NeuralModuleFactory,
+    OperationMode,
+    SimpleLossLoggerCallback,
+    WandbCallback,
+)
+from nemo.utils import logging
+
+if __name__ == "__main__":
+    # Create the default parser.
+    parser = argparse.ArgumentParser(parents=[nm_argparse.NemoArgParser()], conflict_handler='resolve')
+    # Parse the arguments
+    args = parser.parse_args()
+
+    # Instantiate Neural Factory.
+    nf = NeuralModuleFactory(local_rank=args.local_rank, placement=DeviceType.CPU)
+
+    # Data layer - upscale the CIFAR10 images to ImageNet resolution.
+    cifar10_dl = CIFAR10DataLayer(height=224, width=224, train=True)
+    # The "model".
+    image_encoder = GenericImageEncoder(model_type="vgg16", return_feature_maps=True, pretrained=True, name="vgg16")
+    reshaper = ReshapeTensor(input_sizes=[-1, 7, 7, 512], output_sizes=[-1, 25088])
+    ffn = FeedForwardNetwork(input_size=25088, output_size=10, hidden_sizes=[1000, 1000], dropout_rate=0.1)
+    nl = NonLinearity(type="logsoftmax", sizes=[-1, 10])
+    # Loss.
+    nll_loss = NLLLoss()
+
+    # Create a training graph.
+    with NeuralGraph(operation_mode=OperationMode.training) as training_graph:
+        img, tgt = cifar10_dl()
+        feat_map = image_encoder(inputs=img)
+        res_img = reshaper(inputs=feat_map)
+        logits = ffn(inputs=res_img)
+        pred = nl(inputs=logits)
+        loss = nll_loss(predictions=pred, targets=tgt)
+        # Set output - that output will be used for training.
+        training_graph.outputs["loss"] = loss
+
+    # Freeze the pretrained encoder.
+    training_graph.freeze(["vgg16"])
+    logging.info(training_graph.summary())
+
+    # SimpleLossLoggerCallback will print loss values to console.
+    callback = SimpleLossLoggerCallback(
+        tensors=[loss], print_func=lambda x: logging.info(f'Training Loss: {str(x[0].item())}')
+    )
+
+    # Log training metrics to W&B.
+    wand_callback = WandbCallback(
+        train_tensors=[loss], wandb_name="simple-mnist-fft", wandb_project="cv-collection-image-classification",
+    )
+
+    # Invoke the "train" action.
+    nf.train(
+        training_graph=training_graph,
+        callbacks=[callback, wand_callback],
+        optimization_params={"num_epochs": 10, "lr": 0.001},
+        optimizer="adam",
+    )