typedb · jmsfltchr · Oct 2, 2019 · Sep 30, 2019 · Sep 30, 2019 · Oct 2, 2019
@@ -56,6 +56,7 @@ assemble_pip(
         'pyparsing==2.4.2',
         'python-dateutil==2.8.0',
         'pytz==2019.2',
+        'scipy==1.3.1',
         'semantic-version==2.8.2',
         'six==1.12.0',
         'tensorboard==1.14.0',

@@ -29,10 +29,10 @@
 from kglib.utils.graph.iterate import multidigraph_data_iterator
 
 
-def diagnosis_example(num_graphs=60,
+def diagnosis_example(num_graphs=200,
                       num_processing_steps_tr=10,
                       num_processing_steps_ge=10,
-                      num_training_iterations=2000,
+                      num_training_iterations=1000,
                       keyspace="diagnosis", uri="localhost:48555"):
 
     tr_ge_split = int(num_graphs*0.5)
@@ -230,7 +230,7 @@ def write_predictions_to_grakn(graphs, tx):
                              f'insert'
                              f'$pd(predicted-patient: $p, predicted-diagnosed-disease: $d) isa predicted-diagnosis,'
                              f'has probability-exists {p[2]:.3f},'
-                             f'has probability-non-exists {p[1]:.3f},'
+                             f'has probability-non-exists {p[1]:.3f},'  
                              f'has probability-preexists {p[0]:.3f};')
                     tx.query(query)
     tx.commit()

@@ -66,6 +66,9 @@ py_library(
         requirement('termcolor'),
         # requirement('wrapt'),
 
+        # Scipy deps
+        requirement('scipy')
+
     ],
     visibility=['//visibility:public']
 )
@@ -22,8 +22,8 @@
 import tensorflow as tf
 
 from kglib.kgcn.learn.feed import create_placeholders, create_feed_dict, make_all_runnable_in_session
-from kglib.kgcn.learn.loss import loss_ops_from_difference
-from kglib.kgcn.learn.metrics import compute_accuracy
+from kglib.kgcn.learn.loss import loss_ops_preexisting_no_penalty
+from kglib.kgcn.learn.metrics import existence_accuracy
 
 
 class KGCNLearner:
@@ -40,9 +40,9 @@ def __call__(self,
                  tr_target_graphs,
                  ge_input_graphs,
                  ge_target_graphs,
-                 num_training_iterations=10000,
+                 num_training_iterations=1000,
                  learning_rate=1e-3,
-                 log_every_epochs=50,
+                 log_every_epochs=20,
                  log_dir=None):
         """
         Args:
@@ -67,11 +67,11 @@ def __call__(self,
         output_ops_ge = self._model(input_ph, self._num_processing_steps_ge)
 
         # Training loss.
-        loss_ops_tr = loss_ops_from_difference(target_ph, output_ops_tr)
+        loss_ops_tr = loss_ops_preexisting_no_penalty(target_ph, output_ops_tr)
         # Loss across processing steps.
         loss_op_tr = sum(loss_ops_tr) / self._num_processing_steps_tr
         # Test/generalization loss.
-        loss_ops_ge = loss_ops_from_difference(target_ph, output_ops_ge)
+        loss_ops_ge = loss_ops_preexisting_no_penalty(target_ph, output_ops_ge)
         loss_op_ge = loss_ops_ge[-1]  # Loss from final processing step.
 
         # Optimizer
@@ -123,10 +123,10 @@ def __call__(self,
                         "outputs": output_ops_ge
                     },
                     feed_dict=feed_dict)
-                correct_tr, solved_tr = compute_accuracy(
-                    train_values["target"], train_values["outputs"][-1], use_edges=True)
-                correct_ge, solved_ge = compute_accuracy(
-                    test_values["target"], test_values["outputs"][-1], use_edges=True)
+                correct_tr, solved_tr = existence_accuracy(
+                    train_values["target"], train_values["outputs"][-1], use_edges=False)
+                correct_ge, solved_ge = existence_accuracy(
+                    test_values["target"], test_values["outputs"][-1], use_edges=False)
 
                 elapsed = time.time() - start_time
                 losses_tr.append(train_values["loss"])

@@ -29,14 +29,23 @@
 
 class ITKGCNLearner(unittest.TestCase):
     def test_learner_runs(self):
-        graph = nx.MultiDiGraph()
+        input_graph = nx.MultiDiGraph()
         # TODO Remove 'input' and 'solution' fields, only needed for plotting which should be separated
-        graph.add_node(0, type='person', features=np.array([0, 1, 2], dtype=np.float32), input=1, solution=0)
-        graph.add_edge(1, 0, type='employee', features=np.array([0, 1, 2], dtype=np.float32), input=1, solution=0)
-        graph.add_node(1, type='employment', features=np.array([0, 1, 2], dtype=np.float32), input=1, solution=0)
-        graph.add_edge(1, 2, type='employer', features=np.array([0, 1, 2], dtype=np.float32), input=1, solution=0)
-        graph.add_node(2, type='company', features=np.array([0, 1, 2], dtype=np.float32), input=1, solution=0)
-        graph.graph['features'] = np.zeros(5, dtype=np.float32)
+        input_graph.add_node(0, type='person', features=np.array([0, 1, 2], dtype=np.float32))
+        input_graph.add_edge(1, 0, type='employee', features=np.array([0, 1, 2], dtype=np.float32))
+        input_graph.add_node(1, type='employment', features=np.array([0, 1, 2], dtype=np.float32))
+        input_graph.add_edge(1, 2, type='employer', features=np.array([0, 1, 2], dtype=np.float32))
+        input_graph.add_node(2, type='company', features=np.array([0, 1, 2], dtype=np.float32))
+        input_graph.graph['features'] = np.zeros(5, dtype=np.float32)
+
+        target_graph = nx.MultiDiGraph()
+        # TODO Remove 'input' and 'solution' fields, only needed for plotting which should be separated
+        target_graph.add_node(0, type='person', features=np.array([0, 1, 0], dtype=np.float32))
+        target_graph.add_edge(1, 0, type='employee', features=np.array([0, 0, 1], dtype=np.float32))
+        target_graph.add_node(1, type='employment', features=np.array([0, 0, 1], dtype=np.float32))
+        target_graph.add_edge(1, 2, type='employer', features=np.array([0, 0, 1], dtype=np.float32))
+        target_graph.add_node(2, type='company', features=np.array([0, 1, 0], dtype=np.float32))
+        target_graph.graph['features'] = np.zeros(5, dtype=np.float32)
 
         attr_embedding_dim = 6
         attr_embedders = {lambda: BlankAttribute(attr_embedding_dim): [0, 1, 2]}
@@ -45,7 +54,7 @@ def test_learner_runs(self):
 
         learner = KGCNLearner(kgcn, num_processing_steps_tr=2, num_processing_steps_ge=2)
 
-        learner([graph], [graph], [graph], [graph], num_training_iterations=50)
+        learner([input_graph], [target_graph], [input_graph], [target_graph], num_training_iterations=50)
 
 
 if __name__ == "__main__":

@@ -32,8 +32,7 @@ def loss_ops_from_difference(target_op, output_ops):
 
     """
     loss_ops = [
-        tf.losses.softmax_cross_entropy(target_op.nodes, output_op.nodes) +
-        tf.losses.softmax_cross_entropy(target_op.edges, output_op.edges)
+        tf.losses.softmax_cross_entropy(target_op.nodes, output_op.nodes)
         for output_op in output_ops
     ]
     return loss_ops
@@ -53,18 +52,12 @@ def loss_ops_preexisting_no_penalty(target_op, output_ops):
     """
     loss_ops = []
     for output_op in output_ops:
-        node_mask_op = tf.math.reduce_any(tf.math.not_equal(target_op.nodes, tf.constant(np.array([0., 0., 1.]))),
-                                          axis=1)
+        node_mask_op = tf.math.reduce_any(
+            tf.math.not_equal(target_op.nodes, tf.constant(np.array([1., 0., 0.]), dtype=tf.float32)), axis=1)
         target_nodes = tf.boolean_mask(target_op.nodes, node_mask_op)
         output_nodes = tf.boolean_mask(output_op.nodes, node_mask_op)
 
-        edge_mask_op = tf.math.reduce_any(tf.math.not_equal(target_op.nodes, tf.constant(np.array([0., 0., 1.]))),
-                                          axis=1)
-        target_edges = tf.boolean_mask(target_op.nodes, edge_mask_op)
-        output_edges = tf.boolean_mask(output_op.nodes, edge_mask_op)
-
-        loss_op = (tf.losses.softmax_cross_entropy(target_nodes, output_nodes)
-                   + tf.losses.softmax_cross_entropy(target_edges, output_edges))
+        loss_op = tf.losses.softmax_cross_entropy(target_nodes, output_nodes)
 
         loss_ops.append(loss_op)
 

@@ -20,6 +20,8 @@
 import numpy as np
 from graph_nets import utils_np
 
+from scipy.special import softmax
+
 
 def compute_accuracy(target, output, use_nodes=True, use_edges=True):
     """Calculate model accuracy.
@@ -63,3 +65,38 @@ def compute_accuracy(target, output, use_nodes=True, use_edges=True):
     correct = np.mean(np.concatenate(cs, axis=0))
     solved = np.mean(np.stack(ss))
     return correct, solved
+
+
+def existence_accuracy(target, output, use_nodes=True, use_edges=True):
+    if not use_nodes and not use_edges:
+        raise ValueError("Nodes or edges (or both) must be used")
+    tdds = utils_np.graphs_tuple_to_data_dicts(target)
+    odds = utils_np.graphs_tuple_to_data_dicts(output)
+    cs = []
+    ss = []
+    for td, od in zip(tdds, odds):
+
+        nodes_to_predict = td["nodes"][:, 0] == 0
+        xn = np.argmax(td["nodes"][:, 1:], axis=-1)
+        xn = xn[nodes_to_predict]
+        yn = np.argmax(softmax(od["nodes"][:, 1:], axis=1), axis=-1)
+        yn = yn[nodes_to_predict]
+
+        edges_to_predict = td["edges"][:, 0] == 0
+        xe = np.argmax(td["edges"][:, 1:], axis=-1)
+        xe = xe[edges_to_predict]
+        ye = np.argmax(softmax(od["edges"][:, 1:], axis=1), axis=-1)
+        ye = ye[edges_to_predict]
+
+        c = []
+        if use_nodes:
+            c.append(xn == yn)
+        if use_edges:
+            c.append(xe == ye)
+        c = np.concatenate(c, axis=0)
+        s = np.all(c)
+        cs.append(c)
+        ss.append(s)
+    correct = np.mean(np.concatenate(cs, axis=0))
+    solved = np.mean(np.stack(ss))
+    return correct, solved
@@ -22,7 +22,7 @@
 import numpy as np
 from graph_nets.graphs import GraphsTuple
 
-from kglib.kgcn.learn.metrics import compute_accuracy
+from kglib.kgcn.learn.metrics import compute_accuracy, existence_accuracy
 
 
 class TestComputeAccuracy(unittest.TestCase):
@@ -65,5 +65,45 @@ def test_compute_accuracy_is_as_expected(self):
         self.assertEqual(expected_solved, solved)
 
 
+class TestExistenceAccuracy(unittest.TestCase):
+
+    def test_compute_accuracy_is_as_expected(self):
+
+        t_nodes = np.array([[1, 0, 0], [0, 0, 1], [0, 0, 1]], dtype=np.float32)
+        o_nodes = np.array([[0, 1, 0], [0, 1, 0], [0, 0, 1]], dtype=np.float32)
+        t_edges = np.array([[0, 1, 0], [1, 0, 0]], dtype=np.float32)
+        o_edges = np.array([[1, 0, 0], [1, 0, 0]], dtype=np.float32)
+
+        globals = None
+        senders = np.array([0, 1])
+        receivers = np.array([1, 2])
+        n_node = np.array([3])
+        n_edge = np.array([2])
+
+        target = GraphsTuple(nodes=t_nodes,
+                             edges=t_edges,
+                             globals=globals,
+                             receivers=receivers,
+                             senders=senders,
+                             n_node=n_node,
+                             n_edge=n_edge)
+
+        output = GraphsTuple(nodes=o_nodes,
+                             edges=o_edges,
+                             globals=globals,
+                             receivers=receivers,
+                             senders=senders,
+                             n_node=n_node,
+                             n_edge=n_edge)
+
+        correct, solved = existence_accuracy(target, output)
+
+        expected_correct = 2/3
+        expected_solved = 0.0
+
+        self.assertEqual(expected_correct, correct)
+        self.assertEqual(expected_solved, solved)
+
+
 if __name__ == "__main__":
     unittest.main()
@@ -29,7 +29,7 @@
 
 
 def softmax(x):
-    return np.exp(x) / sum(np.exp(x))
+    return np.exp(x) / np.sum(np.exp(x))
 
 
 def make_mlp_model(latent_size=16, num_layers=2):

@@ -67,7 +67,7 @@ def augment_data_fields(graph_data_iterator, fields_to_augment, augmented_field)
     """
 
     for data in graph_data_iterator:
-        data[augmented_field] = np.hstack([np.array(data[field], dtype=float) for field in fields_to_augment])
+        data[augmented_field] = np.hstack([np.array(data[field], dtype=np.float32) for field in fields_to_augment])
 
 
 def encode_solutions(graph, solution_field="solution", encoded_solution_field="encoded_solution",

@@ -23,6 +23,7 @@ protobuf==3.9.1
 pyparsing==2.4.2
 python-dateutil==2.8.0
 pytz==2019.2
+scipy==1.3.1
 semantic-version==2.8.2
 six==1.12.0
 tensorboard==1.14.0

@@ -37,8 +37,8 @@ def tearDown(self):
 
     def test_learning_is_done(self):
         solveds_tr, solveds_ge = diagnosis_example()
-        self.assertGreaterEqual(solveds_tr[-1], 0.1)
-        self.assertGreaterEqual(solveds_tr[-1], 0.1)
+        self.assertGreaterEqual(solveds_tr[-1], 0.7)
+        self.assertGreaterEqual(solveds_ge[-1], 0.7)
 
 
 if __name__ == "__main__":