From 7ea28e4908aae220e8c91cfc1c7ee76f04da61a2 Mon Sep 17 00:00:00 2001 From: James Fletcher Date: Wed, 4 Dec 2019 23:24:28 +0000 Subject: [PATCH 1/4] Make the code less generic, more transparent and more readable --- .gitignore | 1 + kglib/kgcn/pipeline/pipeline.py | 66 +++++++++++-------- .../{pipeline_test.py => pipeline_IT.py} | 26 +++++++- 3 files changed, 64 insertions(+), 29 deletions(-) rename kglib/kgcn/pipeline/{pipeline_test.py => pipeline_IT.py} (60%) diff --git a/.gitignore b/.gitignore index c51e2013..75665292 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ __pycache__/ # Data input/output directories dataset/ +kglib/kgcn/examples/diagnosis/events/ diff --git a/kglib/kgcn/pipeline/pipeline.py b/kglib/kgcn/pipeline/pipeline.py index 321946c7..bee06931 100644 --- a/kglib/kgcn/pipeline/pipeline.py +++ b/kglib/kgcn/pipeline/pipeline.py @@ -71,7 +71,17 @@ def pipeline(graphs, # Build and run the KGCN ############################################################ - attr_embedders = configure_embedders(node_types, attr_embedding_dim, categorical_attributes, continuous_attributes) + # Create embedders for the different attribute types + attr_embedders = dict() + + if categorical_attributes is not None: + attr_embedders.update(construct_categorical_embedders(node_types, attr_embedding_dim, categorical_attributes)) + + if continuous_attributes is not None: + attr_embedders.update(construct_continuous_embedders(node_types, attr_embedding_dim, continuous_attributes)) + + attr_embedders.update(construct_non_attribute_embedders(node_types, attr_embedding_dim, categorical_attributes, + continuous_attributes)) kgcn = KGCN(len(node_types), len(edge_types), @@ -110,48 +120,49 @@ def pipeline(graphs, return ge_graphs, solveds_tr, solveds_ge -def configure_embedders(node_types, attr_embedding_dim, categorical_attributes, continuous_attributes): +def construct_categorical_embedders(node_types, attr_embedding_dim, categorical_attributes): + attr_embedders = dict() - def construct_embedder_funcs(node_types, attribute_config, embedder_func): + # Construct attribute embedders + for attribute_type, categories in categorical_attributes.items(): - attr_embedders = dict() + attr_typ_index = node_types.index(attribute_type) - # Construct attribute embedders - for attribute_type, attribute_props in attribute_config.items(): + def make_embedder(): + return CategoricalAttribute(len(categories), attr_embedding_dim, + name=attribute_type + '_cat_embedder') - attr_typ_index = node_types.index(attribute_type) + # Record the embedder, and the index of the type that it should encode + attr_embedders[make_embedder] = [attr_typ_index] - # Record the embedder, and the index of the type that it should encode - attr_embedders[embedder_func(attribute_type, attribute_props)] = [attr_typ_index] + return attr_embedders - return attr_embedders +def construct_continuous_embedders(node_types, attr_embedding_dim, continuous_attributes): attr_embedders = dict() - if categorical_attributes is not None: + # Construct attribute embedders + for attribute_type in continuous_attributes.keys(): - def embedder_func(attribute_type, category_values): - def make_embedder(): - return CategoricalAttribute(len(category_values), attr_embedding_dim, - name=attribute_type + '_cat_embedder') - return make_embedder + attr_typ_index = node_types.index(attribute_type) - attr_embedders.update(construct_embedder_funcs(node_types, categorical_attributes, embedder_func)) + def make_embedder(): + return ContinuousAttribute(attr_embedding_dim, name=attribute_type + '_cat_embedder') - if continuous_attributes is not None: + # Record the embedder, and the index of the type that it should encode + attr_embedders[make_embedder] = [attr_typ_index] + + return attr_embedders - def embedder_func(attribute_type, _): - def make_embedder(): - return ContinuousAttribute(attr_embedding_dim, name=attribute_type + '_cat_embedder') - return make_embedder - attr_embedders.update(construct_embedder_funcs(node_types, continuous_attributes, embedder_func)) +def construct_non_attribute_embedders(node_types, attr_embedding_dim, categorical_attributes, continuous_attributes): - attribute_nodes = [l for el in list(attr_embedders.values()) for l in el] + attribute_names = list(categorical_attributes.keys()) + attribute_names.extend(list(continuous_attributes.keys())) non_attribute_nodes = [] - for i, _ in enumerate(node_types): - if i not in attribute_nodes: + for i, type in enumerate(node_types): + if type not in attribute_names: non_attribute_nodes.append(i) # All entities and relations (non-attributes) also need an embedder with matching output dimension, which does @@ -159,6 +170,9 @@ def make_embedder(): def make_blank_embedder(): return BlankAttribute(attr_embedding_dim) + attr_embedders = dict() + if len(non_attribute_nodes) > 0: attr_embedders[make_blank_embedder] = non_attribute_nodes return attr_embedders + diff --git a/kglib/kgcn/pipeline/pipeline_test.py b/kglib/kgcn/pipeline/pipeline_IT.py similarity index 60% rename from kglib/kgcn/pipeline/pipeline_test.py rename to kglib/kgcn/pipeline/pipeline_IT.py index b9703192..a049b28e 100644 --- a/kglib/kgcn/pipeline/pipeline_test.py +++ b/kglib/kgcn/pipeline/pipeline_IT.py @@ -18,7 +18,22 @@ # import unittest -from kglib.kgcn.pipeline.pipeline import configure_embedders +from kglib.kgcn.pipeline.pipeline import construct_non_attribute_embedders, construct_categorical_embedders, \ + construct_continuous_embedders + + +def construct_embedders(node_types, attr_embedding_dim, categorical_attributes, continuous_attributes): + attr_embedders = dict() + + if categorical_attributes is not None: + attr_embedders.update(construct_categorical_embedders(node_types, attr_embedding_dim, categorical_attributes)) + + if continuous_attributes is not None: + attr_embedders.update(construct_continuous_embedders(node_types, attr_embedding_dim, continuous_attributes)) + + attr_embedders.update(construct_non_attribute_embedders(node_types, attr_embedding_dim, categorical_attributes, + continuous_attributes)) + return attr_embedders class TestConfigureEmbedders(unittest.TestCase): @@ -28,7 +43,9 @@ def test_all_types_encoded(self): attr_embedding_dim = 5 categorical_attributes = {'a': ['option1', 'option2']} continuous_attributes = {'b': (0, 1)} - attr_embedders = configure_embedders(node_types, attr_embedding_dim, categorical_attributes, continuous_attributes) + + attr_embedders = construct_embedders(node_types, attr_embedding_dim, categorical_attributes, + continuous_attributes) all_types = [l for el in list(attr_embedders.values()) for l in el] expected_types = [0, 1, 2] @@ -40,7 +57,10 @@ def test_multiple_categorical_embedders(self): attr_embedding_dim = 5 categorical_attributes = {'a': ['option1', 'option2'], 'c': ['option3', 'option4']} continuous_attributes = {'b': (0, 1)} - attr_embedders = configure_embedders(node_types, attr_embedding_dim, categorical_attributes, continuous_attributes) + + attr_embedders = construct_embedders(node_types, attr_embedding_dim, categorical_attributes, + continuous_attributes) + all_types = [l for el in list(attr_embedders.values()) for l in el] all_types.sort() From d5c00da1d66a14ea9b6ca6e30ef7a45b4a7fb507 Mon Sep 17 00:00:00 2001 From: James Fletcher Date: Wed, 4 Dec 2019 23:47:29 +0000 Subject: [PATCH 2/4] Move embedder construction into a separate file --- kglib/kgcn/pipeline/embed.py | 77 +++++++++++++++++++ .../pipeline/{pipeline_IT.py => embed_IT.py} | 4 +- kglib/kgcn/pipeline/pipeline.py | 61 +-------------- 3 files changed, 81 insertions(+), 61 deletions(-) create mode 100644 kglib/kgcn/pipeline/embed.py rename kglib/kgcn/pipeline/{pipeline_IT.py => embed_IT.py} (95%) diff --git a/kglib/kgcn/pipeline/embed.py b/kglib/kgcn/pipeline/embed.py new file mode 100644 index 00000000..e042e6ca --- /dev/null +++ b/kglib/kgcn/pipeline/embed.py @@ -0,0 +1,77 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +from kglib.kgcn.models.attribute import CategoricalAttribute, ContinuousAttribute, BlankAttribute + + +def construct_categorical_embedders(node_types, attr_embedding_dim, categorical_attributes): + attr_embedders = dict() + + # Construct attribute embedders + for attribute_type, categories in categorical_attributes.items(): + + attr_typ_index = node_types.index(attribute_type) + + def make_embedder(): + return CategoricalAttribute(len(categories), attr_embedding_dim, + name=attribute_type + '_cat_embedder') + + # Record the embedder, and the index of the type that it should encode + attr_embedders[make_embedder] = [attr_typ_index] + + return attr_embedders + + +def construct_continuous_embedders(node_types, attr_embedding_dim, continuous_attributes): + attr_embedders = dict() + + # Construct attribute embedders + for attribute_type in continuous_attributes.keys(): + + attr_typ_index = node_types.index(attribute_type) + + def make_embedder(): + return ContinuousAttribute(attr_embedding_dim, name=attribute_type + '_cat_embedder') + + # Record the embedder, and the index of the type that it should encode + attr_embedders[make_embedder] = [attr_typ_index] + + return attr_embedders + + +def construct_non_attribute_embedders(node_types, attr_embedding_dim, categorical_attributes, continuous_attributes): + + attribute_names = list(categorical_attributes.keys()) + attribute_names.extend(list(continuous_attributes.keys())) + + non_attribute_nodes = [] + for i, type in enumerate(node_types): + if type not in attribute_names: + non_attribute_nodes.append(i) + + # All entities and relations (non-attributes) also need an embedder with matching output dimension, which does + # nothing. This is provided as a list of their indices + def make_blank_embedder(): + return BlankAttribute(attr_embedding_dim) + + attr_embedders = dict() + + if len(non_attribute_nodes) > 0: + attr_embedders[make_blank_embedder] = non_attribute_nodes + return attr_embedders \ No newline at end of file diff --git a/kglib/kgcn/pipeline/pipeline_IT.py b/kglib/kgcn/pipeline/embed_IT.py similarity index 95% rename from kglib/kgcn/pipeline/pipeline_IT.py rename to kglib/kgcn/pipeline/embed_IT.py index a049b28e..d032f26b 100644 --- a/kglib/kgcn/pipeline/pipeline_IT.py +++ b/kglib/kgcn/pipeline/embed_IT.py @@ -18,8 +18,8 @@ # import unittest -from kglib.kgcn.pipeline.pipeline import construct_non_attribute_embedders, construct_categorical_embedders, \ - construct_continuous_embedders +from kglib.kgcn.pipeline.embed import construct_categorical_embedders, construct_continuous_embedders, \ + construct_non_attribute_embedders def construct_embedders(node_types, attr_embedding_dim, categorical_attributes, continuous_attributes): diff --git a/kglib/kgcn/pipeline/pipeline.py b/kglib/kgcn/pipeline/pipeline.py index bee06931..02d30a63 100644 --- a/kglib/kgcn/pipeline/pipeline.py +++ b/kglib/kgcn/pipeline/pipeline.py @@ -22,8 +22,9 @@ from graph_nets.utils_np import graphs_tuple_to_networkxs from kglib.kgcn.learn.learn import KGCNLearner -from kglib.kgcn.models.attribute import ContinuousAttribute, CategoricalAttribute, BlankAttribute from kglib.kgcn.models.core import softmax, KGCN +from kglib.kgcn.pipeline.embed import construct_categorical_embedders, construct_continuous_embedders, \ + construct_non_attribute_embedders from kglib.kgcn.pipeline.encode import encode_types, create_input_graph, create_target_graph, encode_values from kglib.kgcn.pipeline.utils import apply_logits_to_graphs, duplicate_edges_in_reverse from kglib.kgcn.plot.plotting import plot_across_training, plot_predictions @@ -118,61 +119,3 @@ def pipeline(graphs, _, _, _, _, _, solveds_tr, solveds_ge = tr_info return ge_graphs, solveds_tr, solveds_ge - - -def construct_categorical_embedders(node_types, attr_embedding_dim, categorical_attributes): - attr_embedders = dict() - - # Construct attribute embedders - for attribute_type, categories in categorical_attributes.items(): - - attr_typ_index = node_types.index(attribute_type) - - def make_embedder(): - return CategoricalAttribute(len(categories), attr_embedding_dim, - name=attribute_type + '_cat_embedder') - - # Record the embedder, and the index of the type that it should encode - attr_embedders[make_embedder] = [attr_typ_index] - - return attr_embedders - - -def construct_continuous_embedders(node_types, attr_embedding_dim, continuous_attributes): - attr_embedders = dict() - - # Construct attribute embedders - for attribute_type in continuous_attributes.keys(): - - attr_typ_index = node_types.index(attribute_type) - - def make_embedder(): - return ContinuousAttribute(attr_embedding_dim, name=attribute_type + '_cat_embedder') - - # Record the embedder, and the index of the type that it should encode - attr_embedders[make_embedder] = [attr_typ_index] - - return attr_embedders - - -def construct_non_attribute_embedders(node_types, attr_embedding_dim, categorical_attributes, continuous_attributes): - - attribute_names = list(categorical_attributes.keys()) - attribute_names.extend(list(continuous_attributes.keys())) - - non_attribute_nodes = [] - for i, type in enumerate(node_types): - if type not in attribute_names: - non_attribute_nodes.append(i) - - # All entities and relations (non-attributes) also need an embedder with matching output dimension, which does - # nothing. This is provided as a list of their indices - def make_blank_embedder(): - return BlankAttribute(attr_embedding_dim) - - attr_embedders = dict() - - if len(non_attribute_nodes) > 0: - attr_embedders[make_blank_embedder] = non_attribute_nodes - return attr_embedders - From e70ce0d5bae4ea92f1571731df7dcbfa65c63bea Mon Sep 17 00:00:00 2001 From: James Fletcher Date: Thu, 5 Dec 2019 22:26:28 +0000 Subject: [PATCH 3/4] Refactor embedding of concepts. Make KGCN model properly composed by passing in a ThingEmbedder and a RoleEmbedder. Moves all embedding code into one place --- kglib/kgcn/models/BUILD | 10 ++ kglib/kgcn/models/core.py | 32 ++---- kglib/kgcn/models/core_IT.py | 10 +- kglib/kgcn/models/embedding.py | 103 +++++++++++++++++- .../embed_IT.py => models/embedding_IT.py} | 9 +- kglib/kgcn/models/embedding_test.py | 40 +------ kglib/kgcn/pipeline/embed.py | 77 ------------- kglib/kgcn/pipeline/pipeline.py | 23 +--- 8 files changed, 140 insertions(+), 164 deletions(-) rename kglib/kgcn/{pipeline/embed_IT.py => models/embedding_IT.py} (93%) delete mode 100644 kglib/kgcn/pipeline/embed.py diff --git a/kglib/kgcn/models/BUILD b/kglib/kgcn/models/BUILD index 733287b0..80b78cd7 100644 --- a/kglib/kgcn/models/BUILD +++ b/kglib/kgcn/models/BUILD @@ -33,6 +33,16 @@ py_test( ] ) +py_test( + name = "embedding_IT", + srcs = [ + "embedding_IT.py" + ], + deps = [ + "models" + ] +) + py_test( name = "typewise_test", srcs = [ diff --git a/kglib/kgcn/models/core.py b/kglib/kgcn/models/core.py index aae90e29..41a57a25 100644 --- a/kglib/kgcn/models/core.py +++ b/kglib/kgcn/models/core.py @@ -17,16 +17,12 @@ # under the License. # -from functools import partial - import numpy as np import sonnet as snt from graph_nets import modules from graph_nets import utils_tf from graph_nets.modules import GraphIndependent -from kglib.kgcn.models.embedding import common_embedding, node_embedding - def softmax(x): return np.exp(x) / np.sum(np.exp(x)) @@ -81,11 +77,8 @@ class KGCN(snt.AbstractModule): """ def __init__(self, - num_node_types, - num_edge_types, - type_embedding_dim, - attr_embedding_dim, - attr_embedders, + thing_embedder, + role_embedder, edge_output_size=3, node_output_size=3, latent_size=16, @@ -93,11 +86,9 @@ def __init__(self, name="KGCN"): super(KGCN, self).__init__(name=name) - self._num_node_types = num_node_types - self._num_edge_types = num_edge_types - self._type_embedding_dim = type_embedding_dim - self._attr_embedding_dim = attr_embedding_dim - self._attr_embedders = attr_embedders + self._thing_embedder = thing_embedder + self._role_embedder = role_embedder + self._latent_size = latent_size self._num_layers = num_layers @@ -117,21 +108,12 @@ def __init__(self, self._output_transform = modules.GraphIndependent(edge_fn, node_fn, None) def _edge_model(self): - common_embedding_module = snt.Module( - partial(common_embedding, num_types=self._num_edge_types, - type_embedding_dim=self._type_embedding_dim) - ) - - return snt.Sequential([common_embedding_module, + return snt.Sequential([self._role_embedder, snt.nets.MLP([self._latent_size] * self._num_layers, activate_final=True), snt.LayerNorm()]) def _node_model(self): - node_embedding_module = snt.Module( - partial(node_embedding, num_types=self._num_node_types, type_embedding_dim=self._type_embedding_dim, - attr_encoders=self._attr_embedders, attr_embedding_dim=self._attr_embedding_dim) - ) - return snt.Sequential([node_embedding_module, + return snt.Sequential([self._thing_embedder, snt.nets.MLP([self._latent_size] * self._num_layers, activate_final=True), snt.LayerNorm()]) diff --git a/kglib/kgcn/models/core_IT.py b/kglib/kgcn/models/core_IT.py index 6b7a31cb..f558342c 100644 --- a/kglib/kgcn/models/core_IT.py +++ b/kglib/kgcn/models/core_IT.py @@ -24,6 +24,7 @@ from graph_nets.graphs import GraphsTuple from kglib.kgcn.models.core import KGCN +from kglib.kgcn.models.embedding import ThingEmbedder, RoleEmbedder class ITKGCN(unittest.TestCase): @@ -39,8 +40,13 @@ def test_kgcn_runs(self): n_node=tf.convert_to_tensor(np.array([3], dtype=np.int32)), n_edge=tf.convert_to_tensor(np.array([2], dtype=np.int32))) - attr_embedders = {lambda: lambda x: tf.constant(np.zeros((3, 6), dtype=np.float32)): [0, 1, 2]} - kgcn = KGCN(3, 2, 5, 6, attr_embedders, edge_output_size=3, node_output_size=3) + thing_embedder = ThingEmbedder(node_types=['a', 'b', 'c'], type_embedding_dim=5, attr_embedding_dim=6, + categorical_attributes={'a': ['a1', 'a2', 'a3'], 'b': ['b1', 'b2', 'b3']}, + continuous_attributes={'c': (0, 1)}) + + role_embedder = RoleEmbedder(num_edge_types=2, type_embedding_dim=5) + + kgcn = KGCN(thing_embedder, role_embedder, edge_output_size=3, node_output_size=3) kgcn(graph, 2) diff --git a/kglib/kgcn/models/embedding.py b/kglib/kgcn/models/embedding.py index 4df818fa..62a9d990 100644 --- a/kglib/kgcn/models/embedding.py +++ b/kglib/kgcn/models/embedding.py @@ -19,10 +19,51 @@ import tensorflow as tf import sonnet as snt + +from kglib.kgcn.models.attribute import CategoricalAttribute, ContinuousAttribute, BlankAttribute from kglib.kgcn.models.typewise import TypewiseEncoder -def common_embedding(features, num_types, type_embedding_dim): +class ThingEmbedder(snt.AbstractModule): + def __init__(self, node_types, type_embedding_dim, attr_embedding_dim, categorical_attributes, + continuous_attributes, name="ThingEmbedder"): + super(ThingEmbedder, self).__init__(name=name) + + self._node_types = node_types + self._type_embedding_dim = type_embedding_dim + self._attr_embedding_dim = attr_embedding_dim + + # Create embedders for the different attribute types + self._attr_embedders = dict() + + if categorical_attributes is not None: + self._attr_embedders.update( + construct_categorical_embedders(node_types, attr_embedding_dim, categorical_attributes)) + + if continuous_attributes is not None: + self._attr_embedders.update( + construct_continuous_embedders(node_types, attr_embedding_dim, continuous_attributes)) + + self._attr_embedders.update( + construct_non_attribute_embedders(node_types, attr_embedding_dim, categorical_attributes, + continuous_attributes)) + + def _build(self, features): + return tf.concat([embed_type(features, len(self._node_types), self._type_embedding_dim), + embed_attribute(features, self._attr_embedders, self._attr_embedding_dim)], axis=1) + + +class RoleEmbedder(snt.AbstractModule): + def __init__(self, num_edge_types, type_embedding_dim, name="RoleEmbedder"): + super(RoleEmbedder, self).__init__(name=name) + self._num_edge_types = num_edge_types + self._type_embedding_dim = type_embedding_dim + + def _build(self, features): + return embed_type(features, self._num_edge_types, self._type_embedding_dim) + + +def embed_type(features, num_types, type_embedding_dim): preexistance_feat = tf.expand_dims(tf.cast(features[:, 0], dtype=tf.float32), axis=1) type_embedder = snt.Embed(num_types, type_embedding_dim) norm = snt.LayerNorm() @@ -31,13 +72,65 @@ def common_embedding(features, num_types, type_embedding_dim): return tf.concat([preexistance_feat, type_embedding], axis=1) -def attribute_embedding(features, attr_encoders, attr_embedding_dim): +def embed_attribute(features, attr_encoders, attr_embedding_dim): typewise_attribute_encoder = TypewiseEncoder(attr_encoders, attr_embedding_dim) attr_embedding = typewise_attribute_encoder(features[:, 1:]) tf.summary.histogram('attribute_embedding_histogram', attr_embedding) return attr_embedding -def node_embedding(features, num_types, type_embedding_dim, attr_encoders, attr_embedding_dim): - return tf.concat([common_embedding(features, num_types, type_embedding_dim), - attribute_embedding(features, attr_encoders, attr_embedding_dim)], axis=1) +def construct_categorical_embedders(node_types, attr_embedding_dim, categorical_attributes): + attr_embedders = dict() + + # Construct attribute embedders + for attribute_type, categories in categorical_attributes.items(): + + attr_typ_index = node_types.index(attribute_type) + + def make_embedder(): + return CategoricalAttribute(len(categories), attr_embedding_dim, + name=attribute_type + '_cat_embedder') + + # Record the embedder, and the index of the type that it should encode + attr_embedders[make_embedder] = [attr_typ_index] + + return attr_embedders + + +def construct_continuous_embedders(node_types, attr_embedding_dim, continuous_attributes): + attr_embedders = dict() + + # Construct attribute embedders + for attribute_type in continuous_attributes.keys(): + + attr_typ_index = node_types.index(attribute_type) + + def make_embedder(): + return ContinuousAttribute(attr_embedding_dim, name=attribute_type + '_cat_embedder') + + # Record the embedder, and the index of the type that it should encode + attr_embedders[make_embedder] = [attr_typ_index] + + return attr_embedders + + +def construct_non_attribute_embedders(node_types, attr_embedding_dim, categorical_attributes, continuous_attributes): + + attribute_names = list(categorical_attributes.keys()) + attribute_names.extend(list(continuous_attributes.keys())) + + non_attribute_nodes = [] + for i, type in enumerate(node_types): + if type not in attribute_names: + non_attribute_nodes.append(i) + + # All entities and relations (non-attributes) also need an embedder with matching output dimension, which does + # nothing. This is provided as a list of their indices + def make_blank_embedder(): + return BlankAttribute(attr_embedding_dim) + + attr_embedders = dict() + + if len(non_attribute_nodes) > 0: + attr_embedders[make_blank_embedder] = non_attribute_nodes + return attr_embedders diff --git a/kglib/kgcn/pipeline/embed_IT.py b/kglib/kgcn/models/embedding_IT.py similarity index 93% rename from kglib/kgcn/pipeline/embed_IT.py rename to kglib/kgcn/models/embedding_IT.py index d032f26b..4f003401 100644 --- a/kglib/kgcn/pipeline/embed_IT.py +++ b/kglib/kgcn/models/embedding_IT.py @@ -16,9 +16,10 @@ # specific language governing permissions and limitations # under the License. # + import unittest -from kglib.kgcn.pipeline.embed import construct_categorical_embedders, construct_continuous_embedders, \ +from kglib.kgcn.models.embedding import construct_categorical_embedders, construct_continuous_embedders, \ construct_non_attribute_embedders @@ -36,7 +37,7 @@ def construct_embedders(node_types, attr_embedding_dim, categorical_attributes, return attr_embedders -class TestConfigureEmbedders(unittest.TestCase): +class TestConstructingEmbedders(unittest.TestCase): def test_all_types_encoded(self): node_types = ['a', 'b', 'c'] @@ -71,3 +72,7 @@ def test_multiple_categorical_embedders(self): for types in attr_embedders.values(): self.assertNotEqual(types, []) + + +if __name__ == "__main__": + unittest.main() diff --git a/kglib/kgcn/models/embedding_test.py b/kglib/kgcn/models/embedding_test.py index 5fd6ac82..69d52b9d 100644 --- a/kglib/kgcn/models/embedding_test.py +++ b/kglib/kgcn/models/embedding_test.py @@ -23,18 +23,18 @@ import tensorflow as tf from unittest.mock import Mock from unittest.mock import patch -from kglib.kgcn.models.embedding import common_embedding, attribute_embedding, node_embedding +from kglib.kgcn.models.embedding import embed_type, embed_attribute from kglib.utils.test.utils import get_call_args -class TestCommonEmbedding(unittest.TestCase): +class TestTypeEmbedding(unittest.TestCase): def setUp(self): tf.enable_eager_execution() def test_embedding_output_shape_as_expected(self): features = np.array([[1, 0, 0.7], [1, 2, 0.7], [0, 1, 0.5]], dtype=np.float32) type_embedding_dim = 5 - output = common_embedding(features, 3, type_embedding_dim) + output = embed_type(features, 3, type_embedding_dim) np.testing.assert_array_equal(np.array([3, 6]), output.shape) @@ -54,7 +54,7 @@ def test_embedding_is_typewise(self): attr_encoders = Mock() attr_embedding_dim = Mock() - attribute_embedding(features, attr_encoders, attr_embedding_dim) # Function under test + embed_attribute(features, attr_encoders, attr_embedding_dim) # Function under test mock_class.assert_called_once_with(attr_encoders, attr_embedding_dim) call_args = get_call_args(mock_instance) @@ -64,37 +64,5 @@ def test_embedding_is_typewise(self): patcher.stop() -class TestNodeEmbedding(unittest.TestCase): - - def setUp(self): - tf.enable_eager_execution() - - def test_embedding_is_typewise(self): - features = Mock() - num_types = Mock() - type_embedding_dim = Mock() - attr_encoders = Mock() - attr_embedding_dim = Mock() - - mock_attribute_embedding = Mock(return_value=np.ones((3, 5))) - - mock_common_embedding = Mock(return_value=np.ones((3, 4))) - - patcher_attr = patch('kglib.kgcn.models.embedding.attribute_embedding', spec=True, - new=mock_attribute_embedding) - patcher_attr.start() - - patcher_common = patch('kglib.kgcn.models.embedding.common_embedding', spec=True, - new=mock_common_embedding) - patcher_common.start() - - embedding = node_embedding(features, num_types, type_embedding_dim, attr_encoders, attr_embedding_dim) - - np.testing.assert_array_equal(np.ones((3, 9)), embedding.numpy()) - - patcher_attr.stop() - patcher_common.stop() - - if __name__ == "__main__": unittest.main() diff --git a/kglib/kgcn/pipeline/embed.py b/kglib/kgcn/pipeline/embed.py deleted file mode 100644 index e042e6ca..00000000 --- a/kglib/kgcn/pipeline/embed.py +++ /dev/null @@ -1,77 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -from kglib.kgcn.models.attribute import CategoricalAttribute, ContinuousAttribute, BlankAttribute - - -def construct_categorical_embedders(node_types, attr_embedding_dim, categorical_attributes): - attr_embedders = dict() - - # Construct attribute embedders - for attribute_type, categories in categorical_attributes.items(): - - attr_typ_index = node_types.index(attribute_type) - - def make_embedder(): - return CategoricalAttribute(len(categories), attr_embedding_dim, - name=attribute_type + '_cat_embedder') - - # Record the embedder, and the index of the type that it should encode - attr_embedders[make_embedder] = [attr_typ_index] - - return attr_embedders - - -def construct_continuous_embedders(node_types, attr_embedding_dim, continuous_attributes): - attr_embedders = dict() - - # Construct attribute embedders - for attribute_type in continuous_attributes.keys(): - - attr_typ_index = node_types.index(attribute_type) - - def make_embedder(): - return ContinuousAttribute(attr_embedding_dim, name=attribute_type + '_cat_embedder') - - # Record the embedder, and the index of the type that it should encode - attr_embedders[make_embedder] = [attr_typ_index] - - return attr_embedders - - -def construct_non_attribute_embedders(node_types, attr_embedding_dim, categorical_attributes, continuous_attributes): - - attribute_names = list(categorical_attributes.keys()) - attribute_names.extend(list(continuous_attributes.keys())) - - non_attribute_nodes = [] - for i, type in enumerate(node_types): - if type not in attribute_names: - non_attribute_nodes.append(i) - - # All entities and relations (non-attributes) also need an embedder with matching output dimension, which does - # nothing. This is provided as a list of their indices - def make_blank_embedder(): - return BlankAttribute(attr_embedding_dim) - - attr_embedders = dict() - - if len(non_attribute_nodes) > 0: - attr_embedders[make_blank_embedder] = non_attribute_nodes - return attr_embedders \ No newline at end of file diff --git a/kglib/kgcn/pipeline/pipeline.py b/kglib/kgcn/pipeline/pipeline.py index 02d30a63..8d73f372 100644 --- a/kglib/kgcn/pipeline/pipeline.py +++ b/kglib/kgcn/pipeline/pipeline.py @@ -23,8 +23,7 @@ from kglib.kgcn.learn.learn import KGCNLearner from kglib.kgcn.models.core import softmax, KGCN -from kglib.kgcn.pipeline.embed import construct_categorical_embedders, construct_continuous_embedders, \ - construct_non_attribute_embedders +from kglib.kgcn.models.embedding import ThingEmbedder, RoleEmbedder from kglib.kgcn.pipeline.encode import encode_types, create_input_graph, create_target_graph, encode_values from kglib.kgcn.pipeline.utils import apply_logits_to_graphs, duplicate_edges_in_reverse from kglib.kgcn.plot.plotting import plot_across_training, plot_predictions @@ -72,23 +71,13 @@ def pipeline(graphs, # Build and run the KGCN ############################################################ - # Create embedders for the different attribute types - attr_embedders = dict() + thing_embedder = ThingEmbedder(node_types, type_embedding_dim, attr_embedding_dim, categorical_attributes, + continuous_attributes) - if categorical_attributes is not None: - attr_embedders.update(construct_categorical_embedders(node_types, attr_embedding_dim, categorical_attributes)) + role_embedder = RoleEmbedder(len(edge_types), type_embedding_dim) - if continuous_attributes is not None: - attr_embedders.update(construct_continuous_embedders(node_types, attr_embedding_dim, continuous_attributes)) - - attr_embedders.update(construct_non_attribute_embedders(node_types, attr_embedding_dim, categorical_attributes, - continuous_attributes)) - - kgcn = KGCN(len(node_types), - len(edge_types), - type_embedding_dim, - attr_embedding_dim, - attr_embedders, + kgcn = KGCN(thing_embedder, + role_embedder, edge_output_size=edge_output_size, node_output_size=node_output_size) From beb5769c5e98fd1540b590165ed2f0ba0e24d18c Mon Sep 17 00:00:00 2001 From: James Fletcher Date: Thu, 5 Dec 2019 22:53:23 +0000 Subject: [PATCH 4/4] Fix learning integration test --- kglib/kgcn/learn/learn_IT.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/kglib/kgcn/learn/learn_IT.py b/kglib/kgcn/learn/learn_IT.py index 78fef552..8718bb2b 100644 --- a/kglib/kgcn/learn/learn_IT.py +++ b/kglib/kgcn/learn/learn_IT.py @@ -25,6 +25,7 @@ from kglib.kgcn.learn.learn import KGCNLearner from kglib.kgcn.models.attribute import BlankAttribute from kglib.kgcn.models.core import KGCN +from kglib.kgcn.models.embedding import ThingEmbedder, RoleEmbedder class ITKGCNLearner(unittest.TestCase): @@ -47,10 +48,12 @@ def test_learner_runs(self): target_graph.add_node(2, type='company', features=np.array([0, 1, 0], dtype=np.float32)) target_graph.graph['features'] = np.zeros(5, dtype=np.float32) - attr_embedding_dim = 6 - attr_embedders = {lambda: BlankAttribute(attr_embedding_dim): [0, 1, 2]} + thing_embedder = ThingEmbedder(node_types=['person', 'employment', 'employee'], type_embedding_dim=5, + attr_embedding_dim=6, categorical_attributes={}, continuous_attributes={}) - kgcn = KGCN(3, 2, 5, attr_embedding_dim, attr_embedders, edge_output_size=3, node_output_size=3) + role_embedder = RoleEmbedder(num_edge_types=2, type_embedding_dim=5) + + kgcn = KGCN(thing_embedder, role_embedder, edge_output_size=3, node_output_size=3) learner = KGCNLearner(kgcn, num_processing_steps_tr=2, num_processing_steps_ge=2)