Skip to content
This repository has been archived by the owner on Nov 18, 2023. It is now read-only.

Refactor Attribute Embedder Construction #110

Merged
merged 4 commits into from
Dec 6, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ __pycache__/

# Data input/output directories
dataset/
kglib/kgcn/examples/diagnosis/events/
9 changes: 6 additions & 3 deletions kglib/kgcn/learn/learn_IT.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from kglib.kgcn.learn.learn import KGCNLearner
from kglib.kgcn.models.attribute import BlankAttribute
from kglib.kgcn.models.core import KGCN
from kglib.kgcn.models.embedding import ThingEmbedder, RoleEmbedder


class ITKGCNLearner(unittest.TestCase):
Expand All @@ -47,10 +48,12 @@ def test_learner_runs(self):
target_graph.add_node(2, type='company', features=np.array([0, 1, 0], dtype=np.float32))
target_graph.graph['features'] = np.zeros(5, dtype=np.float32)

attr_embedding_dim = 6
attr_embedders = {lambda: BlankAttribute(attr_embedding_dim): [0, 1, 2]}
thing_embedder = ThingEmbedder(node_types=['person', 'employment', 'employee'], type_embedding_dim=5,
attr_embedding_dim=6, categorical_attributes={}, continuous_attributes={})

kgcn = KGCN(3, 2, 5, attr_embedding_dim, attr_embedders, edge_output_size=3, node_output_size=3)
role_embedder = RoleEmbedder(num_edge_types=2, type_embedding_dim=5)

kgcn = KGCN(thing_embedder, role_embedder, edge_output_size=3, node_output_size=3)

learner = KGCNLearner(kgcn, num_processing_steps_tr=2, num_processing_steps_ge=2)

Expand Down
10 changes: 10 additions & 0 deletions kglib/kgcn/models/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,16 @@ py_test(
]
)

py_test(
name = "embedding_IT",
srcs = [
"embedding_IT.py"
],
deps = [
"models"
]
)

py_test(
name = "typewise_test",
srcs = [
Expand Down
32 changes: 7 additions & 25 deletions kglib/kgcn/models/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,12 @@
# under the License.
#

from functools import partial

import numpy as np
import sonnet as snt
from graph_nets import modules
from graph_nets import utils_tf
from graph_nets.modules import GraphIndependent

from kglib.kgcn.models.embedding import common_embedding, node_embedding


def softmax(x):
return np.exp(x) / np.sum(np.exp(x))
Expand Down Expand Up @@ -81,23 +77,18 @@ class KGCN(snt.AbstractModule):
"""

def __init__(self,
num_node_types,
num_edge_types,
type_embedding_dim,
attr_embedding_dim,
attr_embedders,
thing_embedder,
role_embedder,
edge_output_size=3,
node_output_size=3,
latent_size=16,
num_layers=2,
name="KGCN"):
super(KGCN, self).__init__(name=name)

self._num_node_types = num_node_types
self._num_edge_types = num_edge_types
self._type_embedding_dim = type_embedding_dim
self._attr_embedding_dim = attr_embedding_dim
self._attr_embedders = attr_embedders
self._thing_embedder = thing_embedder
self._role_embedder = role_embedder

self._latent_size = latent_size
self._num_layers = num_layers

Expand All @@ -117,21 +108,12 @@ def __init__(self,
self._output_transform = modules.GraphIndependent(edge_fn, node_fn, None)

def _edge_model(self):
common_embedding_module = snt.Module(
partial(common_embedding, num_types=self._num_edge_types,
type_embedding_dim=self._type_embedding_dim)
)

return snt.Sequential([common_embedding_module,
return snt.Sequential([self._role_embedder,
snt.nets.MLP([self._latent_size] * self._num_layers, activate_final=True),
snt.LayerNorm()])

def _node_model(self):
node_embedding_module = snt.Module(
partial(node_embedding, num_types=self._num_node_types, type_embedding_dim=self._type_embedding_dim,
attr_encoders=self._attr_embedders, attr_embedding_dim=self._attr_embedding_dim)
)
return snt.Sequential([node_embedding_module,
return snt.Sequential([self._thing_embedder,
snt.nets.MLP([self._latent_size] * self._num_layers, activate_final=True),
snt.LayerNorm()])

Expand Down
10 changes: 8 additions & 2 deletions kglib/kgcn/models/core_IT.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from graph_nets.graphs import GraphsTuple

from kglib.kgcn.models.core import KGCN
from kglib.kgcn.models.embedding import ThingEmbedder, RoleEmbedder


class ITKGCN(unittest.TestCase):
Expand All @@ -39,8 +40,13 @@ def test_kgcn_runs(self):
n_node=tf.convert_to_tensor(np.array([3], dtype=np.int32)),
n_edge=tf.convert_to_tensor(np.array([2], dtype=np.int32)))

attr_embedders = {lambda: lambda x: tf.constant(np.zeros((3, 6), dtype=np.float32)): [0, 1, 2]}
kgcn = KGCN(3, 2, 5, 6, attr_embedders, edge_output_size=3, node_output_size=3)
thing_embedder = ThingEmbedder(node_types=['a', 'b', 'c'], type_embedding_dim=5, attr_embedding_dim=6,
categorical_attributes={'a': ['a1', 'a2', 'a3'], 'b': ['b1', 'b2', 'b3']},
continuous_attributes={'c': (0, 1)})

role_embedder = RoleEmbedder(num_edge_types=2, type_embedding_dim=5)

kgcn = KGCN(thing_embedder, role_embedder, edge_output_size=3, node_output_size=3)

kgcn(graph, 2)

Expand Down
103 changes: 98 additions & 5 deletions kglib/kgcn/models/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,51 @@

import tensorflow as tf
import sonnet as snt

from kglib.kgcn.models.attribute import CategoricalAttribute, ContinuousAttribute, BlankAttribute
from kglib.kgcn.models.typewise import TypewiseEncoder


def common_embedding(features, num_types, type_embedding_dim):
class ThingEmbedder(snt.AbstractModule):
def __init__(self, node_types, type_embedding_dim, attr_embedding_dim, categorical_attributes,
continuous_attributes, name="ThingEmbedder"):
super(ThingEmbedder, self).__init__(name=name)

self._node_types = node_types
self._type_embedding_dim = type_embedding_dim
self._attr_embedding_dim = attr_embedding_dim

# Create embedders for the different attribute types
self._attr_embedders = dict()

if categorical_attributes is not None:
self._attr_embedders.update(
construct_categorical_embedders(node_types, attr_embedding_dim, categorical_attributes))

if continuous_attributes is not None:
self._attr_embedders.update(
construct_continuous_embedders(node_types, attr_embedding_dim, continuous_attributes))

self._attr_embedders.update(
construct_non_attribute_embedders(node_types, attr_embedding_dim, categorical_attributes,
continuous_attributes))

def _build(self, features):
return tf.concat([embed_type(features, len(self._node_types), self._type_embedding_dim),
embed_attribute(features, self._attr_embedders, self._attr_embedding_dim)], axis=1)


class RoleEmbedder(snt.AbstractModule):
def __init__(self, num_edge_types, type_embedding_dim, name="RoleEmbedder"):
super(RoleEmbedder, self).__init__(name=name)
self._num_edge_types = num_edge_types
self._type_embedding_dim = type_embedding_dim

def _build(self, features):
return embed_type(features, self._num_edge_types, self._type_embedding_dim)


def embed_type(features, num_types, type_embedding_dim):
preexistance_feat = tf.expand_dims(tf.cast(features[:, 0], dtype=tf.float32), axis=1)
type_embedder = snt.Embed(num_types, type_embedding_dim)
norm = snt.LayerNorm()
Expand All @@ -31,13 +72,65 @@ def common_embedding(features, num_types, type_embedding_dim):
return tf.concat([preexistance_feat, type_embedding], axis=1)


def attribute_embedding(features, attr_encoders, attr_embedding_dim):
def embed_attribute(features, attr_encoders, attr_embedding_dim):
typewise_attribute_encoder = TypewiseEncoder(attr_encoders, attr_embedding_dim)
attr_embedding = typewise_attribute_encoder(features[:, 1:])
tf.summary.histogram('attribute_embedding_histogram', attr_embedding)
return attr_embedding


def node_embedding(features, num_types, type_embedding_dim, attr_encoders, attr_embedding_dim):
return tf.concat([common_embedding(features, num_types, type_embedding_dim),
attribute_embedding(features, attr_encoders, attr_embedding_dim)], axis=1)
def construct_categorical_embedders(node_types, attr_embedding_dim, categorical_attributes):
attr_embedders = dict()

# Construct attribute embedders
for attribute_type, categories in categorical_attributes.items():

attr_typ_index = node_types.index(attribute_type)

def make_embedder():
return CategoricalAttribute(len(categories), attr_embedding_dim,
name=attribute_type + '_cat_embedder')

# Record the embedder, and the index of the type that it should encode
attr_embedders[make_embedder] = [attr_typ_index]

return attr_embedders


def construct_continuous_embedders(node_types, attr_embedding_dim, continuous_attributes):
attr_embedders = dict()

# Construct attribute embedders
for attribute_type in continuous_attributes.keys():

attr_typ_index = node_types.index(attribute_type)

def make_embedder():
return ContinuousAttribute(attr_embedding_dim, name=attribute_type + '_cat_embedder')

# Record the embedder, and the index of the type that it should encode
attr_embedders[make_embedder] = [attr_typ_index]

return attr_embedders


def construct_non_attribute_embedders(node_types, attr_embedding_dim, categorical_attributes, continuous_attributes):

attribute_names = list(categorical_attributes.keys())
attribute_names.extend(list(continuous_attributes.keys()))

non_attribute_nodes = []
for i, type in enumerate(node_types):
if type not in attribute_names:
non_attribute_nodes.append(i)

# All entities and relations (non-attributes) also need an embedder with matching output dimension, which does
# nothing. This is provided as a list of their indices
def make_blank_embedder():
return BlankAttribute(attr_embedding_dim)

attr_embedders = dict()

if len(non_attribute_nodes) > 0:
attr_embedders[make_blank_embedder] = non_attribute_nodes
return attr_embedders
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,37 @@
# specific language governing permissions and limitations
# under the License.
#

import unittest

from kglib.kgcn.pipeline.pipeline import configure_embedders
from kglib.kgcn.models.embedding import construct_categorical_embedders, construct_continuous_embedders, \
construct_non_attribute_embedders


def construct_embedders(node_types, attr_embedding_dim, categorical_attributes, continuous_attributes):
attr_embedders = dict()

if categorical_attributes is not None:
attr_embedders.update(construct_categorical_embedders(node_types, attr_embedding_dim, categorical_attributes))

if continuous_attributes is not None:
attr_embedders.update(construct_continuous_embedders(node_types, attr_embedding_dim, continuous_attributes))

attr_embedders.update(construct_non_attribute_embedders(node_types, attr_embedding_dim, categorical_attributes,
continuous_attributes))
return attr_embedders


class TestConfigureEmbedders(unittest.TestCase):
class TestConstructingEmbedders(unittest.TestCase):

def test_all_types_encoded(self):
node_types = ['a', 'b', 'c']
attr_embedding_dim = 5
categorical_attributes = {'a': ['option1', 'option2']}
continuous_attributes = {'b': (0, 1)}
attr_embedders = configure_embedders(node_types, attr_embedding_dim, categorical_attributes, continuous_attributes)

attr_embedders = construct_embedders(node_types, attr_embedding_dim, categorical_attributes,
continuous_attributes)
all_types = [l for el in list(attr_embedders.values()) for l in el]

expected_types = [0, 1, 2]
Expand All @@ -40,7 +58,10 @@ def test_multiple_categorical_embedders(self):
attr_embedding_dim = 5
categorical_attributes = {'a': ['option1', 'option2'], 'c': ['option3', 'option4']}
continuous_attributes = {'b': (0, 1)}
attr_embedders = configure_embedders(node_types, attr_embedding_dim, categorical_attributes, continuous_attributes)

attr_embedders = construct_embedders(node_types, attr_embedding_dim, categorical_attributes,
continuous_attributes)

all_types = [l for el in list(attr_embedders.values()) for l in el]
all_types.sort()

Expand All @@ -51,3 +72,7 @@ def test_multiple_categorical_embedders(self):

for types in attr_embedders.values():
self.assertNotEqual(types, [])


if __name__ == "__main__":
unittest.main()
40 changes: 4 additions & 36 deletions kglib/kgcn/models/embedding_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,18 @@
import tensorflow as tf
from unittest.mock import Mock
from unittest.mock import patch
from kglib.kgcn.models.embedding import common_embedding, attribute_embedding, node_embedding
from kglib.kgcn.models.embedding import embed_type, embed_attribute
from kglib.utils.test.utils import get_call_args


class TestCommonEmbedding(unittest.TestCase):
class TestTypeEmbedding(unittest.TestCase):
def setUp(self):
tf.enable_eager_execution()

def test_embedding_output_shape_as_expected(self):
features = np.array([[1, 0, 0.7], [1, 2, 0.7], [0, 1, 0.5]], dtype=np.float32)
type_embedding_dim = 5
output = common_embedding(features, 3, type_embedding_dim)
output = embed_type(features, 3, type_embedding_dim)

np.testing.assert_array_equal(np.array([3, 6]), output.shape)

Expand All @@ -54,7 +54,7 @@ def test_embedding_is_typewise(self):
attr_encoders = Mock()
attr_embedding_dim = Mock()

attribute_embedding(features, attr_encoders, attr_embedding_dim) # Function under test
embed_attribute(features, attr_encoders, attr_embedding_dim) # Function under test

mock_class.assert_called_once_with(attr_encoders, attr_embedding_dim)
call_args = get_call_args(mock_instance)
Expand All @@ -64,37 +64,5 @@ def test_embedding_is_typewise(self):
patcher.stop()


class TestNodeEmbedding(unittest.TestCase):

def setUp(self):
tf.enable_eager_execution()

def test_embedding_is_typewise(self):
features = Mock()
num_types = Mock()
type_embedding_dim = Mock()
attr_encoders = Mock()
attr_embedding_dim = Mock()

mock_attribute_embedding = Mock(return_value=np.ones((3, 5)))

mock_common_embedding = Mock(return_value=np.ones((3, 4)))

patcher_attr = patch('kglib.kgcn.models.embedding.attribute_embedding', spec=True,
new=mock_attribute_embedding)
patcher_attr.start()

patcher_common = patch('kglib.kgcn.models.embedding.common_embedding', spec=True,
new=mock_common_embedding)
patcher_common.start()

embedding = node_embedding(features, num_types, type_embedding_dim, attr_encoders, attr_embedding_dim)

np.testing.assert_array_equal(np.ones((3, 9)), embedding.numpy())

patcher_attr.stop()
patcher_common.stop()


if __name__ == "__main__":
unittest.main()
Loading