Adding mxnet tests for hook_from_json (aws#143)

* Adding config file for reduce and save_all test scripts * Fixing bug in mxnet reduction util sloved issue aws#142
atqy · Aug 21, 2019 · 65fb762 · 65fb762
1 parent 5b84928
commit 65fb762
Show file tree

Hide file tree

Showing 5 changed files with 144 additions and 33 deletions.
diff --git a/tests/mxnet/test_hook_reduce_config.py b/tests/mxnet/test_hook_reduce_config.py
@@ -7,28 +7,32 @@
 
 from datetime import datetime
 
-def test_save_config():
-    reset_collections()
-    global_reduce_config = ReductionConfig(reductions=["max", "mean"])
-    global_save_config = SaveConfig(save_steps=[0,1,2,3])
-
-    tm.get_collection("ReluActivation").include(["relu*"])
-    tm.get_collection("ReluActivation").set_save_config(SaveConfig(save_steps=[4,5,6]))
-    tm.get_collection("ReluActivation").set_reduction_config(ReductionConfig(reductions=["min"], abs_reductions=["max"]))
-
-    tm.get_collection("flatten").include(["flatten*"])
-    tm.get_collection("flatten").set_save_config(SaveConfig(save_steps=[4,5,6]))
-    tm.get_collection("flatten").set_reduction_config(ReductionConfig(norms=["l1"], abs_norms=["l2"]))
-
-    run_id = 'trial_' + datetime.now().strftime('%Y%m%d-%H%M%S%f')
-    out_dir = './newlogsRunTest/' + run_id
-    hook = t_hook(out_dir=out_dir, save_config=global_save_config, include_collections=['weights', 'bias','gradients',
-                                                                               'default', 'ReluActivation', 'flatten'],
-                reduction_config=global_reduce_config)
-    run_mnist_gluon_model(hook=hook, num_steps_train=10, num_steps_eval=10)
+def test_save_config(hook=None, out_dir=None):
+    hook_created = False
+    if hook is None:
+        hook_created = True
+        reset_collections()
+        global_reduce_config = ReductionConfig(reductions=["max", "mean"])
+        global_save_config = SaveConfig(save_steps=[0,1,2,3])
+
+        tm.get_collection("ReluActivation").include(["relu*"])
+        tm.get_collection("ReluActivation").set_save_config(SaveConfig(save_steps=[4,5,6]))
+        tm.get_collection("ReluActivation").set_reduction_config(ReductionConfig(reductions=["min"], abs_reductions=["max"]))
 
+        tm.get_collection("flatten").include(["flatten*"])
+        tm.get_collection("flatten").set_save_config(SaveConfig(save_steps=[4,5,6]))
+        tm.get_collection("flatten").set_reduction_config(ReductionConfig(norms=["l1"], abs_norms=["l2"]))
+
+        run_id = 'trial_' + datetime.now().strftime('%Y%m%d-%H%M%S%f')
+        out_dir = './newlogsRunTest/' + run_id
+        print("Registering the hook with out_dir {0}".format(out_dir))
+        hook = t_hook(out_dir=out_dir, save_config=global_save_config, include_collections=['weights', 'bias','gradients',
+                                                                                   'default', 'ReluActivation', 'flatten'],
+                    reduction_config=global_reduce_config)
+    run_mnist_gluon_model(hook=hook, num_steps_train=10, num_steps_eval=10)
 
     #Testing
+    print("Created the trial with out_dir {0}".format(out_dir))
     tr = create_trial(out_dir)
     assert tr
     assert len(tr.available_steps())==7
@@ -57,5 +61,16 @@ def test_save_config():
     assert l1_norm != None
     l2_norm = flatten_input.reduction_value(step_num=4, abs=True, reduction_name='l2')
     assert l2_norm != None
+    if hook_created:
+        shutil.rmtree(out_dir)
 
-    shutil.rmtree(out_dir)
+def test_save_config_hook_from_json():
+    from tornasole.core.json_config import TORNASOLE_CONFIG_FILE_PATH_ENV_STR
+    import os
+    out_dir = 'newlogsRunTest2/test_hook_reduce_config_hook_from_json'
+    shutil.rmtree(out_dir, True)
+    os.environ[TORNASOLE_CONFIG_FILE_PATH_ENV_STR] = 'tests/mxnet/test_json_configs/test_hook_reduce_config_hook.json'
+    hook = t_hook.hook_from_config()
+    test_save_config(hook, out_dir)
+    # delete output
+    shutil.rmtree(out_dir, True)
diff --git a/tests/mxnet/test_hook_save_all.py b/tests/mxnet/test_hook_save_all.py
@@ -3,13 +3,42 @@
 from tornasole.mxnet import SaveConfig, reset_collections
 import shutil
 from datetime import datetime
+from tornasole.trials import create_trial
 
-def test_save_all():
-  reset_collections()
-  save_config = SaveConfig(save_steps=[0,1,2,3])
-  run_id = 'trial_' + datetime.now().strftime('%Y%m%d-%H%M%S%f')
-  out_dir = './newlogsRunTest/' + run_id
-  hook = t_hook(out_dir=out_dir, save_config=save_config, save_all=True)
+
+def test_save_all(hook=None, out_dir=None):
+  hook_created = False
+  if hook is None:
+    hook_created = True
+    reset_collections()
+    save_config = SaveConfig(save_steps=[0,1,2,3])
+    run_id = 'trial_' + datetime.now().strftime('%Y%m%d-%H%M%S%f')
+    out_dir = './newlogsRunTest/' + run_id
+    print("Registering the hook with out_dir {}".format(out_dir))
+    hook = t_hook(out_dir=out_dir, save_config=save_config, save_all=True)
   run_mnist_gluon_model(hook=hook, num_steps_train=7, num_steps_eval=5)
-  shutil.rmtree(out_dir)
+  #assert for steps and tensornames
+  print("Created the trial with out_dir {}".format(out_dir))
+  tr = create_trial(out_dir)
+  tensor_list = tr.tensors()
+  assert tr
+  assert len(tr.available_steps()) == 4
+  # some tensor names, like input and output, can't be retrieved from training session, so here we only assert for tensor numbers
+  # 46 is gotten from index file
+  # if no assertion failure, then the script could save all tensors
+  assert len(tensor_list) == 46
+  if hook_created:
+    shutil.rmtree(out_dir)
+
+def test_save_all_hook_from_json():
+  from tornasole.core.json_config import TORNASOLE_CONFIG_FILE_PATH_ENV_STR
+  import os
+  reset_collections()
+  out_dir = 'newlogsRunTest2/test_hook_save_all_hook_from_json'
+  shutil.rmtree(out_dir, True)
+  os.environ[TORNASOLE_CONFIG_FILE_PATH_ENV_STR] = 'tests/mxnet/test_json_configs/test_hook_save_all_hook.json'
+  hook = t_hook.hook_from_config()
+  test_save_all(hook, out_dir)
+  # delete output
+  shutil.rmtree(out_dir, True)
 
diff --git a/tests/mxnet/test_json_configs/test_hook_reduce_config_hook.json b/tests/mxnet/test_json_configs/test_hook_reduce_config_hook.json
@@ -0,0 +1,56 @@
+{
+  "config_name": "my training tornasole config",
+  "s3_path": "s3://kjndjknd_bucket/prefix",
+  "local_path": "newlogsRunTest2/test_hook_reduce_config_hook_from_json",
+  "save_config": "default_save_config",
+  "reduction_config": "default_reduction_config",
+  "include_collections" :  ["weights", "bias","gradients", "default", "ReluActivation", "flatten"],
+  "save_all": false,
+  "collections" : [
+    {
+      "ReluActivation" : {
+        "include_regex": ["relu*"],
+        "save_config": "save_config_collection",
+        "reduction_config": "reduction_config_for_ReluActivation"
+      }
+    },
+    {
+      "flatten": {
+        "include_regex": ["flatten*"],
+        "save_config": "save_config_collection",
+        "reduction_config": "reduction_config_for_flatten"
+      }
+    }
+  ],
+  "save_configs" : [
+    {
+      "save_config_collection" : {
+        "save_steps" : [4,5,6]
+      }
+    },
+    {
+      "default_save_config" : {
+        "save_steps" : [0,1,2,3]
+      }
+    }
+  ],
+  "reduction_configs" : [
+    {
+      "reduction_config_for_ReluActivation" :{
+        "reductions" : ["min"],
+        "abs_reductions" : ["max"]
+      }
+    },
+    {
+      "reduction_config_for_flatten" :{
+        "norms" : ["l1"],
+        "abs_norms" : ["l2"]
+      }
+    },
+    {
+      "default_reduction_config": {
+        "reductions" : ["max", "mean"]
+      }
+    }
+  ]
+}
diff --git a/tests/mxnet/test_json_configs/test_hook_save_all_hook.json b/tests/mxnet/test_json_configs/test_hook_save_all_hook.json
@@ -0,0 +1,14 @@
+{
+  "config_name": "my training tornasole config",
+  "s3_path": "s3://kjndjknd_bucket/prefix",
+  "local_path": "newlogsRunTest2/test_hook_save_all_hook_from_json",
+  "save_config": "default_save_config",
+  "save_all": true,
+  "save_configs" : [
+    {
+      "default_save_config" : {
+        "save_steps" : [0,1,2,3]
+      }
+    }
+  ]
+}
diff --git a/tornasole/mxnet/util.py b/tornasole/mxnet/util.py
@@ -25,11 +25,8 @@ def get_aggregated_data(aggregation_name,
             op = get_numpy_reduction(aggregation_name, numpy_data=tensor_data_np, abs=abs)
         return op
     elif reduction_name in ALLOWED_NORMS:
-        if reduction_name is "l1":
-            op = mx.ndarray.norm(data=tensor_data, ord=1)
-            return op
-        elif reduction_name is "l2":
-            op = mx.ndarray.norm(data=tensor_data, ord=2)
+        if reduction_name in ['l1', 'l2']:
+            op = mx.ndarray.norm(data=tensor_data, ord=int(reduction_name[1]))
             return op
         else:
             raise RuntimeError("Invalid normalization operation {0} for mx.NDArray".format(reduction_name))
@@ -56,4 +53,4 @@ def make_numpy_array(x):
         return np.asarray(x, dtype=x.dtype)
     else:
         raise TypeError('_make_numpy_array only accepts input types of numpy.ndarray, scalar,'
-                        ' and MXNet NDArray, while received type {}'.format(str(type(x))))
+                        ' and MXNet NDArray, while received type {}'.format(str(type(x))))