Added NonLinearity component, simplified the FFN, cifar10 - ResNet50 …

…operational Signed-off-by: Tomasz Kornuta <[email protected]>
NVIDIA · May 23, 2020 · f0331a9 · f0331a9
1 parent a58d3dd
commit f0331a9
Show file tree

Hide file tree

Showing 12 changed files with 275 additions and 106 deletions.
diff --git a/nemo/collections/cv/examples/cifar10_convnet_ffn_image_classification.py b/nemo/collections/cv/examples/cifar10_convnet_ffn_image_classification.py
@@ -17,11 +17,10 @@
 import argparse
 
 import nemo.utils.argparse as nm_argparse
-from nemo.collections.cv.modules.data_layers.cifar10_datalayer import CIFAR10DataLayer
-from nemo.collections.cv.modules.losses.nll_loss import NLLLoss
-from nemo.collections.cv.modules.non_trainables.reshape_tensor import ReshapeTensor
-from nemo.collections.cv.modules.trainables.convnet_encoder import ConvNetEncoder
-from nemo.collections.cv.modules.trainables.feed_forward_network import FeedForwardNetwork
+from nemo.collections.cv.modules.data_layers import CIFAR10DataLayer
+from nemo.collections.cv.modules.losses import NLLLoss
+from nemo.collections.cv.modules.non_trainables import NonLinearity, ReshapeTensor
+from nemo.collections.cv.modules.trainables import ConvNetEncoder, FeedForwardNetwork
 from nemo.core import (
     DeviceType,
     NeuralGraph,
@@ -38,24 +37,26 @@
     # Parse the arguments
     args = parser.parse_args()
 
-    # 0. Instantiate Neural Factory.
+    # Instantiate Neural Factory.
     nf = NeuralModuleFactory(local_rank=args.local_rank, placement=DeviceType.CPU)
 
-    # Data layers for training and validation.
-    dl = CIFAR10DataLayer(train=True)
-    # Model.
+    # Data layer for training.
+    cifar10_dl = CIFAR10DataLayer(train=True)
+    # The "model".
     cnn = ConvNetEncoder(input_depth=3, input_height=32, input_width=32)
     reshaper = ReshapeTensor(input_sizes=[-1, 16, 2, 2], output_sizes=[-1, 64])
-    ffn = FeedForwardNetwork(input_size=64, output_size=10, dropout_rate=0.1, final_logsoftmax=True)
+    ffn = FeedForwardNetwork(input_size=64, output_size=10, dropout_rate=0.1)
+    nl = NonLinearity(type="logsoftmax", sizes=[-1, 10])
     # Loss.
     nll_loss = NLLLoss()
 
-    # 2. Create a training graph.
+    # Create a training graph.
     with NeuralGraph(operation_mode=OperationMode.training) as training_graph:
-        img, tgt = dl()
+        img, tgt = cifar10_dl()
         feat_map = cnn(inputs=img)
         res_img = reshaper(inputs=feat_map)
-        pred = ffn(inputs=res_img)
+        logits = ffn(inputs=res_img)
+        pred = nl(inputs=logits)
         loss = nll_loss(predictions=pred, targets=tgt)
         # Set output - that output will be used for training.
         training_graph.outputs["loss"] = loss

diff --git a/nemo/collections/cv/examples/cifar10_resnet50_image_classification.py b/nemo/collections/cv/examples/cifar10_resnet50_image_classification.py
@@ -0,0 +1,79 @@
+# =============================================================================
+# Copyright (c) 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import argparse
+
+import nemo.utils.argparse as nm_argparse
+from nemo.collections.cv.modules.data_layers import CIFAR10DataLayer
+from nemo.collections.cv.modules.losses import NLLLoss
+from nemo.collections.cv.modules.non_trainables import NonLinearity, ReshapeTensor
+from nemo.collections.cv.modules.trainables import FeedForwardNetwork, GenericImageEncoder
+from nemo.core import (
+    DeviceType,
+    NeuralGraph,
+    NeuralModuleFactory,
+    OperationMode,
+    SimpleLossLoggerCallback,
+    WandbCallback,
+)
+from nemo.utils import logging
+
+if __name__ == "__main__":
+    # Create the default parser.
+    parser = argparse.ArgumentParser(parents=[nm_argparse.NemoArgParser()], conflict_handler='resolve')
+    # Parse the arguments
+    args = parser.parse_args()
+
+    # Instantiate Neural Factory.
+    nf = NeuralModuleFactory(local_rank=args.local_rank, placement=DeviceType.CPU)
+
+    # Data layer - upscale the CIFAR10 images to ImageNet resolution.
+    cifar10_dl = CIFAR10DataLayer(height=224, width=224, train=True)
+    # The "model".
+    image_classifier = GenericImageEncoder(model_type="resnet50", output_size=10, pretrained=True, name="resnet50")
+    nl = NonLinearity(type="logsoftmax", sizes=[-1, 10])
+    # Loss.
+    nll_loss = NLLLoss()
+
+    # Create a training graph.
+    with NeuralGraph(operation_mode=OperationMode.training) as training_graph:
+        img, tgt = cifar10_dl()
+        logits = image_classifier(inputs=img)
+        pred = nl(inputs=logits)
+        loss = nll_loss(predictions=pred, targets=tgt)
+        # Set output - that output will be used for training.
+        training_graph.outputs["loss"] = loss
+
+    # Show info.
+    logging.info(training_graph.summary())
+
+    # SimpleLossLoggerCallback will print loss values to console.
+    callback = SimpleLossLoggerCallback(
+        tensors=[loss], print_func=lambda x: logging.info(f'Training Loss: {str(x[0].item())}')
+    )
+
+    # Log training metrics to W&B.
+    wand_callback = WandbCallback(
+        train_tensors=[loss], wandb_name="simple-mnist-fft", wandb_project="cv-collection-image-classification",
+    )
+
+    # Invoke the "train" action.
+    nf.train(
+        training_graph=training_graph,
+        callbacks=[callback, wand_callback],
+        optimization_params={"num_epochs": 10, "lr": 0.001},
+        optimizer="adam",
+    )
diff --git a/nemo/collections/cv/examples/cifar10_vgg16_ffn_image_classification.py b/nemo/collections/cv/examples/cifar10_vgg16_ffn_image_classification.py
@@ -17,11 +17,10 @@
 import argparse
 
 import nemo.utils.argparse as nm_argparse
-from nemo.collections.cv.modules.data_layers.cifar10_datalayer import CIFAR10DataLayer
-from nemo.collections.cv.modules.losses.nll_loss import NLLLoss
-from nemo.collections.cv.modules.non_trainables.reshape_tensor import ReshapeTensor
-from nemo.collections.cv.modules.trainables.generic_image_encoder import GenericImageEncoder
-from nemo.collections.cv.modules.trainables.feed_forward_network import FeedForwardNetwork
+from nemo.collections.cv.modules.data_layers import CIFAR10DataLayer
+from nemo.collections.cv.modules.losses import NLLLoss
+from nemo.collections.cv.modules.non_trainables import NonLinearity, ReshapeTensor
+from nemo.collections.cv.modules.trainables import FeedForwardNetwork, GenericImageEncoder
 from nemo.core import (
     DeviceType,
     NeuralGraph,
@@ -38,30 +37,32 @@
     # Parse the arguments
     args = parser.parse_args()
 
-    # 0. Instantiate Neural Factory.
+    # Instantiate Neural Factory.
     nf = NeuralModuleFactory(local_rank=args.local_rank, placement=DeviceType.CPU)
 
-    # Data layers for training and validation - upscale the CIFAR10 images to ImageNet resolution.
-    dl = CIFAR10DataLayer(height=224, width=224, train=True)
-    # Model.
+    # Data layer - upscale the CIFAR10 images to ImageNet resolution.
+    cifar10_dl = CIFAR10DataLayer(height=224, width=224, train=True)
+    # The "model".
     image_encoder = GenericImageEncoder(model_type="vgg16", return_feature_maps=True, pretrained=True, name="vgg16")
     reshaper = ReshapeTensor(input_sizes=[-1, 7, 7, 512], output_sizes=[-1, 25088])
-    ffn = FeedForwardNetwork(input_size=25088, output_size=10, hidden_sizes=[1000, 1000], dropout_rate=0.1, final_logsoftmax=True)
+    ffn = FeedForwardNetwork(input_size=25088, output_size=10, hidden_sizes=[1000, 1000], dropout_rate=0.1)
+    nl = NonLinearity(type="logsoftmax", sizes=[-1, 10])
     # Loss.
     nll_loss = NLLLoss()
 
-    # 2. Create a training graph.
+    # Create a training graph.
     with NeuralGraph(operation_mode=OperationMode.training) as training_graph:
-        img, tgt = dl()
+        img, tgt = cifar10_dl()
         feat_map = image_encoder(inputs=img)
         res_img = reshaper(inputs=feat_map)
-        pred = ffn(inputs=res_img)
+        logits = ffn(inputs=res_img)
+        pred = nl(inputs=logits)
         loss = nll_loss(predictions=pred, targets=tgt)
         # Set output - that output will be used for training.
         training_graph.outputs["loss"] = loss
 
     # Freeze the pretrained encoder.
-    training_graph.freeze()
+    training_graph.freeze(["vgg16"])
     logging.info(training_graph.summary())
 
     # SimpleLossLoggerCallback will print loss values to console.

diff --git a/nemo/collections/cv/examples/mnist_convnet_ffn_image_classification.py b/nemo/collections/cv/examples/mnist_convnet_ffn_image_classification.py
@@ -17,11 +17,10 @@
 import argparse
 
 import nemo.utils.argparse as nm_argparse
-from nemo.collections.cv.modules.data_layers.mnist_datalayer import MNISTDataLayer
-from nemo.collections.cv.modules.losses.nll_loss import NLLLoss
-from nemo.collections.cv.modules.non_trainables.reshape_tensor import ReshapeTensor
-from nemo.collections.cv.modules.trainables.convnet_encoder import ConvNetEncoder
-from nemo.collections.cv.modules.trainables.feed_forward_network import FeedForwardNetwork
+from nemo.collections.cv.modules.data_layers import MNISTDataLayer
+from nemo.collections.cv.modules.losses import NLLLoss
+from nemo.collections.cv.modules.non_trainables import NonLinearity, ReshapeTensor
+from nemo.collections.cv.modules.trainables import ConvNetEncoder, FeedForwardNetwork
 from nemo.core import (
     DeviceType,
     NeuralGraph,
@@ -43,19 +42,21 @@
 
     # Data layers for training and validation.
     dl = MNISTDataLayer(height=28, width=28, train=True)
-    # Model.
+    # The "model".
     cnn = ConvNetEncoder(input_depth=1, input_height=28, input_width=28)
     reshaper = ReshapeTensor(input_sizes=[-1, 16, 1, 1], output_sizes=[-1, 16])
-    ffn = FeedForwardNetwork(input_size=16, output_size=10, dropout_rate=0.1, final_logsoftmax=True)
+    ffn = FeedForwardNetwork(input_size=16, output_size=10, dropout_rate=0.1)
+    nl = NonLinearity(type="logsoftmax", sizes=[-1, 10])
     # Loss.
     nll_loss = NLLLoss()
 
-    # 2. Create a training graph.
+    # Create a training graph.
     with NeuralGraph(operation_mode=OperationMode.training) as training_graph:
         img, tgt = dl()
         feat_map = cnn(inputs=img)
         res_img = reshaper(inputs=feat_map)
-        pred = ffn(inputs=res_img)
+        logits = ffn(inputs=res_img)
+        pred = nl(inputs=logits)
         loss = nll_loss(predictions=pred, targets=tgt)
         # Set output - that output will be used for training.
         training_graph.outputs["loss"] = loss

diff --git a/nemo/collections/cv/examples/mnist_ffn_image_classification.py b/nemo/collections/cv/examples/mnist_ffn_image_classification.py
@@ -16,13 +16,11 @@
 
 import argparse
 
-from torch import max, mean, stack, tensor
-
 import nemo.utils.argparse as nm_argparse
-from nemo.collections.cv.modules.data_layers.mnist_datalayer import MNISTDataLayer
-from nemo.collections.cv.modules.losses.nll_loss import NLLLoss
-from nemo.collections.cv.modules.non_trainables.reshape_tensor import ReshapeTensor
-from nemo.collections.cv.modules.trainables.feed_forward_network import FeedForwardNetwork
+from nemo.collections.cv.modules.data_layers import MNISTDataLayer
+from nemo.collections.cv.modules.losses import NLLLoss
+from nemo.collections.cv.modules.non_trainables import NonLinearity, ReshapeTensor
+from nemo.collections.cv.modules.trainables import FeedForwardNetwork
 from nemo.core import (
     DeviceType,
     NeuralGraph,
@@ -39,25 +37,25 @@
     # Parse the arguments
     args = parser.parse_args()
 
-    # 0. Instantiate Neural Factory.
+    # Instantiate Neural Factory.
     nf = NeuralModuleFactory(local_rank=args.local_rank, placement=DeviceType.CPU)
 
     # Data layers for training and validation.
     dl = MNISTDataLayer(height=28, width=28, train=True)
-    # Model.
+    # The "model".
     reshaper = ReshapeTensor(input_sizes=[-1, 1, 32, 32], output_sizes=[-1, 784])
-    ffn = FeedForwardNetwork(
-        input_size=784, output_size=10, hidden_sizes=[100, 100], dropout_rate=0.1, final_logsoftmax=True
-    )
+    ffn = FeedForwardNetwork(input_size=784, output_size=10, hidden_sizes=[100, 100], dropout_rate=0.1)
+    nl = NonLinearity(type="logsoftmax", sizes=[-1, 10])
     # Loss.
     nll_loss = NLLLoss()
 
-    # 2. Create a training graph.
+    # Create a training graph.
     with NeuralGraph(operation_mode=OperationMode.training) as training_graph:
-        img, tgt = dl()
-        res_img = reshaper(inputs=img)
-        pred = ffn(inputs=res_img)
-        loss = nll_loss(predictions=pred, targets=tgt)
+        imgs, tgts = dl()
+        res_imgs = reshaper(inputs=imgs)
+        logits = ffn(inputs=res_imgs)
+        preds = nl(inputs=logits)
+        loss = nll_loss(predictions=preds, targets=tgts)
         # Set output - that output will be used for training.
         training_graph.outputs["loss"] = loss
 

diff --git a/nemo/collections/cv/examples/mnist_lenet5_image_classification.py b/nemo/collections/cv/examples/mnist_lenet5_image_classification.py
@@ -19,9 +19,9 @@
 from torch import max, mean, stack, tensor
 
 import nemo.utils.argparse as nm_argparse
-from nemo.collections.cv.modules.data_layers.mnist_datalayer import MNISTDataLayer
-from nemo.collections.cv.modules.losses.nll_loss import NLLLoss
-from nemo.collections.cv.modules.trainables.lenet5 import LeNet5
+from nemo.collections.cv.modules.data_layers import MNISTDataLayer
+from nemo.collections.cv.modules.losses import NLLLoss
+from nemo.collections.cv.modules.trainables import LeNet5
 from nemo.core import (
     DeviceType,
     EvaluatorCallback,
@@ -38,32 +38,32 @@
     # Parse the arguments
     args = parser.parse_args()
 
-    # 0. Instantiate Neural Factory.
+    # Instantiate Neural Factory.
     nf = NeuralModuleFactory(local_rank=args.local_rank, placement=DeviceType.GPU)
 
     # Data layers for training and validation.
     dl = MNISTDataLayer(height=32, width=32, train=True)
     dl_e = MNISTDataLayer(height=32, width=32, train=False)
-    # Model.
+    # The "model".
     lenet5 = LeNet5()
     # Loss.
     nll_loss = NLLLoss()
 
-    # 2. Create a training graph.
+    # Create a training graph.
     with NeuralGraph(operation_mode=OperationMode.training) as training_graph:
         x, y = dl()
         p = lenet5(images=x)
         loss = nll_loss(predictions=p, targets=y)
         # Set output - that output will be used for training.
         training_graph.outputs["loss"] = loss
 
-    # 3. Create a validation graph, starting from the second data layer.
+    # Create a validation graph, starting from the second data layer.
     with NeuralGraph(operation_mode=OperationMode.evaluation) as evaluation_graph:
         x, y = dl_e()
         p = lenet5(images=x)
         loss_e = nll_loss(predictions=p, targets=y)
 
-    # 4. Create the callbacks.
+    # Create the callbacks.
     def eval_loss_per_batch_callback(tensors, global_vars):
         if "eval_loss" not in global_vars.keys():
             global_vars["eval_loss"] = []

diff --git a/nemo/collections/cv/modules/non_trainables/__init__.py b/nemo/collections/cv/modules/non_trainables/__init__.py
@@ -14,4 +14,5 @@
 # limitations under the License.
 # =============================================================================
 
+from nemo.collections.cv.modules.non_trainables.non_linearity import *
 from nemo.collections.cv.modules.non_trainables.reshape_tensor import *