Add MultiHeadAttention model option to xm_launcher

PiperOrigin-RevId: 493797031
tensorflow · Dec 8, 2022 · ed9fa45 · ed9fa45
1 parent dd0d1a2
commit ed9fa45
Show file tree

Hide file tree

Showing 8 changed files with 341 additions and 1 deletion.
diff --git a/tensorflow_gnn/models/multi_head_attention/BUILD b/tensorflow_gnn/models/multi_head_attention/BUILD
@@ -1,4 +1,4 @@
-load("@tensorflow_gnn//tensorflow_gnn:tensorflow_gnn.bzl", "pytype_strict_library")
+load("@tensorflow_gnn//tensorflow_gnn:tensorflow_gnn.bzl", "pytype_strict_contrib_test", "pytype_strict_library")
 load("@tensorflow_gnn//tensorflow_gnn:tensorflow_gnn.bzl", "tf_py_test")
 
 licenses(["notice"])
@@ -18,12 +18,45 @@ pytype_strict_library(
         ":users",
     ],
     deps = [
+        ":config_dict",
         ":layers",
     ],
 )
 
+# A special BUILD target with a declaration of the model's hyperparameter search space.
+# Unlike the model itself, this does not depend on TF/TF-GNN, but does depend on Vizier.
+pytype_strict_library(
+    name = "hparams_vizier",
+    srcs = ["hparams_vizier.py"],
+    visibility = [
+        ":__subpackages__",
+        ":users",
+    ],
+    deps = [
+        "//:expect_vizier_service_pyvizier_installed",
+    ],
+)
+
+exports_files(
+    srcs = ["hparams_vizier.py"],
+    visibility = [
+        ":__subpackages__",
+    ],
+)
+
 # =============================================================================
 
+pytype_strict_contrib_test(
+    name = "hparams_vizier_test",
+    srcs = ["hparams_vizier_test.py"],
+    python_version = "PY3",
+    deps = [
+        ":hparams_vizier",
+        "//:expect_vizier_service_pyvizier_installed",
+        "//third_party/py/absl/testing:absltest",
+    ],
+)
+
 pytype_strict_library(
     name = "layers",
     srcs = ["layers.py"],
@@ -34,6 +67,29 @@ pytype_strict_library(
     ],
 )
 
+pytype_strict_library(
+    name = "config_dict",
+    srcs = ["config_dict.py"],
+    deps = [
+        ":layers",
+        "//third_party/py/ml_collections/config_dict",
+        "//:expect_tensorflow_installed",
+    ],
+)
+
+tf_py_test(
+    name = "config_dict_test",
+    srcs = ["config_dict_test.py"],
+    python_version = "PY3",
+    srcs_version = "PY3",
+    deps = [
+        ":config_dict",
+        ":layers",
+        "//:expect_tensorflow_installed",
+        "//tensorflow_gnn",
+    ],
+)
+
 tf_py_test(
     name = "layers_test",
     srcs = ["layers_test.py"],

diff --git a/tensorflow_gnn/models/multi_head_attention/__init__.py b/tensorflow_gnn/models/multi_head_attention/__init__.py
@@ -22,12 +22,16 @@
 ```
 """
 
+from tensorflow_gnn.models.multi_head_attention import config_dict
 from tensorflow_gnn.models.multi_head_attention import layers
 
 MultiHeadAttentionConv = layers.MultiHeadAttentionConv
 MultiHeadAttentionEdgePool = layers.MultiHeadAttentionEdgePool
 MultiHeadAttentionHomGraphUpdate = layers.MultiHeadAttentionHomGraphUpdate
 MultiHeadAttentionMPNNGraphUpdate = layers.MultiHeadAttentionMPNNGraphUpdate
+graph_update_get_config_dict = config_dict.graph_update_get_config_dict
+graph_update_from_config_dict = config_dict.graph_update_from_config_dict
 
 # Prune imported module symbols so they're not accessible implicitly.
+del config_dict
 del layers
diff --git a/tensorflow_gnn/models/multi_head_attention/config_dict.py b/tensorflow_gnn/models/multi_head_attention/config_dict.py
@@ -0,0 +1,60 @@
+# Copyright 2022 The TensorFlow GNN Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""ConfigDict for Multi-Head Attention."""
+
+from ml_collections import config_dict
+import tensorflow as tf
+from tensorflow_gnn.models.multi_head_attention import layers
+
+
+def graph_update_get_config_dict() -> config_dict.ConfigDict:
+  """Returns ConfigDict for graph_update_from_config_dict() with defaults."""
+  # Keep in sync with default args of
+  # MultiHeadAttentionMPNNGraphUpdate.__init__.
+  cfg = config_dict.ConfigDict()
+  cfg.units = config_dict.placeholder(int)  # Sets type to Optional[int].
+  cfg.message_dim = config_dict.placeholder(int)
+  cfg.num_heads = config_dict.placeholder(int)
+  cfg.receiver_tag = config_dict.placeholder(int)
+  cfg.l2_regularization = 0.0
+  cfg.edge_dropout_rate = 0.0
+  cfg.state_dropout_rate = 0.0
+  cfg.conv_activation = "relu"
+  cfg.activation = "relu"
+  cfg.lock()
+  return cfg
+
+
+def graph_update_from_config_dict(
+    cfg: config_dict.ConfigDict) -> tf.keras.layers.Layer:
+  """Returns a MultiHeadAttentionMPNNGraphUpdate initialized from `cfg`.
+
+  Args:
+    cfg: A `ConfigDict` with the fields defined by
+      `graph_update_get_config_dict()`. All fields with non-`None` values are
+      used as keyword arguments for initializing and returning a
+      `MultiHeadAttentionMPNNGraphUpdate` object. For the required arguments of
+      `MultiHeadAttentionMPNNGraphUpdate.__init__`, users must set a value in
+      `cfg` before passing it here.
+
+  Returns:
+    A new `MultiHeadAttentionMPNNGraphUpdate` object.
+
+  Raises:
+    TypeError: if `cfg` fails to supply a required argument for
+    `MultiHeadAttentionMPNNGraphUpdate.__init__`.
+  """
+  kwargs = {k: v for k, v in cfg.items() if v is not None}
+  return layers.MultiHeadAttentionMPNNGraphUpdate(**kwargs)
diff --git a/tensorflow_gnn/models/multi_head_attention/config_dict_test.py b/tensorflow_gnn/models/multi_head_attention/config_dict_test.py
@@ -0,0 +1,87 @@
+# Copyright 2022 The TensorFlow GNN Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for config_dict."""
+
+import json
+from typing import Mapping
+
+import tensorflow as tf
+import tensorflow_gnn as tfgnn
+from tensorflow_gnn.models.multi_head_attention import config_dict as multi_head_attention_config_dict
+from tensorflow_gnn.models.multi_head_attention import layers
+
+
+class ConfigDictTest(tf.test.TestCase):
+
+  def test_graph_update_defaults(self):
+    units = 32
+    message_dim = 16
+    num_heads = 4
+    receiver_tag = tfgnn.SOURCE
+
+    cfg = multi_head_attention_config_dict.graph_update_get_config_dict()
+    cfg.units = units
+    cfg.message_dim = message_dim
+    cfg.num_heads = num_heads
+    cfg.receiver_tag = receiver_tag
+    actual = multi_head_attention_config_dict.graph_update_from_config_dict(cfg)
+
+    expected = layers.MultiHeadAttentionMPNNGraphUpdate(
+        units=units,
+        message_dim=message_dim,
+        num_heads=num_heads,
+        receiver_tag=receiver_tag)
+
+    self.assertEqual(to_model_config(actual), to_model_config(expected))
+
+
+def to_model_config(layer: tf.keras.layers.Layer):
+  """Returns a parsed model config for `layer`, without `"name"` fields."""
+  # Need a full model to serialize *recursively*.
+  model = tf.keras.Sequential([layer])
+  # Subobjects are only build in the first call.
+  _ = model(_make_test_graph_loop())
+  model_config = json.loads(model.to_json())
+  # The names of layers are uniquified and impede the hparam comparison.
+  return _remove_names(model_config)
+
+
+def _remove_names(obj):
+  """Returns parsed JSON `obj` without dict entries called "name"."""
+  if isinstance(obj, Mapping):
+    return {k: _remove_names(v) for k, v in obj.items() if k != "name"}
+  elif isinstance(obj, (list, tuple)):
+    return type(obj)([_remove_names(v) for v in obj])
+  else:
+    return obj
+
+
+def _make_test_graph_loop():
+  """Returns a scalar GraphTensor with one node and one egde."""
+  return tfgnn.GraphTensor.from_pieces(
+      node_sets={
+          "nodes": tfgnn.NodeSet.from_fields(
+              sizes=tf.constant([1]),
+              features={tfgnn.HIDDEN_STATE: tf.constant([[1.]])})},
+      edge_sets={
+          "edges": tfgnn.EdgeSet.from_fields(
+              sizes=tf.constant([1]),
+              adjacency=tfgnn.Adjacency.from_indices(
+                  ("nodes", tf.constant([0])),
+                  ("nodes", tf.constant([0]))))})
+
+
+if __name__ == "__main__":
+  tf.test.main()
diff --git a/tensorflow_gnn/models/multi_head_attention/hparams_vizier.py b/tensorflow_gnn/models/multi_head_attention/hparams_vizier.py
@@ -0,0 +1,74 @@
+# Copyright 2022 The TensorFlow GNN Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Hyperparameter search spaces for Vizier studies.
+
+This file defines search spaces for hyperparameter tuning of the
+MultiHeadAttention model architecture with https://github.com/google/vizier.
+End-to-end models built with MultiHeadAttention can use this to configure and
+launch a Vizier study and the training runs for its trials. It's up to them how
+to forward Vizier params to the training script and its use of
+MultiHeadAttention. The parameter names set here for Vizier match the keyword
+arguments in the Python modeling code.
+
+For each search space definition, this file has a function
+
+```
+add_params_<name>(search_space)
+```
+
+that modifies `search_space` in-place by adding parameters and returns `None`.
+"""
+
+from vizier.service import pyvizier as vz
+
+
+def add_params_regularization(search_space: vz.SearchSpace,
+                              *, prefix: str = "")-> None:
+  """Adds params for a study of regularization strength.
+
+  Args:
+    search_space: a `pyvizier.SearchSpace` that is changed in-place by adding
+      `state_dropout_rate`, `edge_dropout_rate` and `l2_regularization`.
+    prefix: a prefix added to param names.
+  """
+  # The params in `root` apply to all trials in the Vizier study.
+  # go/pyvizier also lets you add conditional params.
+  root = search_space.root
+  root.add_discrete_param(
+      prefix + "state_dropout_rate", [.1, .2, .3],
+      scale_type=vz.ScaleType.LINEAR)
+  root.add_discrete_param(
+      prefix + "edge_dropout_rate", [.1, .2, .3],
+      scale_type=vz.ScaleType.LINEAR)
+  root.add_float_param(
+      prefix + "l2_regularization", 1e-6, 1e-4,
+      scale_type=vz.ScaleType.LOG)
+
+
+def add_params_attention(search_space: vz.SearchSpace,
+                         *, prefix: str = "")-> None:
+  """Adds params for a study of attention configurations.
+
+  Args:
+    search_space: a `pyvizier.SearchSpace` that is changed in-place by adding
+      `num_heads`.
+    prefix: a prefix added to param names.
+  """
+  # The params in `root` apply to all trials in the Vizier study.
+  # go/pyvizier also lets you add conditional params.
+  root = search_space.root
+  root.add_discrete_param(
+      prefix + "num_heads", [2, 4, 8],
+      scale_type=vz.ScaleType.LINEAR)
diff --git a/tensorflow_gnn/models/multi_head_attention/hparams_vizier_test.py b/tensorflow_gnn/models/multi_head_attention/hparams_vizier_test.py
@@ -0,0 +1,44 @@
+# Copyright 2022 The TensorFlow GNN Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for hparams_vizier."""
+
+from absl.testing import absltest
+from tensorflow_gnn.models.multi_head_attention import hparams_vizier
+
+from vizier.service import pyvizier as vz
+
+
+class HparamsVizierTest(absltest.TestCase):
+
+  def test_regularization(self):
+    problem = vz.ProblemStatement()
+    hparams_vizier.add_params_regularization(
+        problem.search_space, prefix="foo.")
+    self.assertCountEqual([p.name for p in problem.search_space.parameters], [
+        "foo.state_dropout_rate", "foo.edge_dropout_rate",
+        "foo.l2_regularization"
+    ])
+
+  def test_attention(self):
+    problem = vz.ProblemStatement()
+    hparams_vizier.add_params_attention(problem.search_space,
+                                        prefix="foo.")
+    self.assertCountEqual(
+        [p.name for p in problem.search_space.parameters],
+        ["foo.num_heads"])
+
+
+if __name__ == "__main__":
+  absltest.main()
diff --git a/tensorflow_gnn/runner/examples/ogbn/mag/BUILD b/tensorflow_gnn/runner/examples/ogbn/mag/BUILD
@@ -16,6 +16,7 @@ pytype_strict_library(
         "//third_party/py/ml_collections/config_flags",
         "//:expect_tensorflow_installed",
         "//tensorflow_gnn",
+        "//tensorflow_gnn/models/multi_head_attention",
         "//tensorflow_gnn/models/vanilla_mpnn",
         "//tensorflow_gnn/runner",
     ],