From 9a55bed79b6f1228f60d0a1ac01690b4ae1615d8 Mon Sep 17 00:00:00 2001 From: MoFHeka Date: Sat, 15 Jun 2024 03:34:19 +0800 Subject: [PATCH 1/2] [feat] Add new setting num_of_buckets_per_alloc from HKV bata 12. It might improve performance of memory access. And this feature also reduce unessential BFC reallocating information to user when CUDA OOM. --- WORKSPACE | 6 +++--- .../core/kernels/lookup_impl/lookup_table_op_hkv.h | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/WORKSPACE b/WORKSPACE index 92f96379c..dade0d058 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -57,9 +57,9 @@ http_archive( http_archive( name = "hkv", build_file = "//build_deps/toolchains/hkv:hkv.BUILD", - sha256 = "0535477e7cd82affa940468647c07caacd54d42a2c775cfdf199b3aa6e4f87a6", - strip_prefix = "HierarchicalKV-0.1.0-beta.11", - url = "https://github.com/NVIDIA-Merlin/HierarchicalKV/archive/refs/tags/v0.1.0-beta.11.tar.gz", + sha256 = "a73d7bea159173db2038f7c5215a7d1fbd5362adfb232fabde206dc64a1e817c", + strip_prefix = "HierarchicalKV-0.1.0-beta.12", + url = "https://github.com/NVIDIA-Merlin/HierarchicalKV/archive/refs/tags/v0.1.0-beta.12.tar.gz", ) tf_configure( diff --git a/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_hkv.h b/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_hkv.h index 41f999396..8931814b2 100644 --- a/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_hkv.h +++ b/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_hkv.h @@ -472,6 +472,20 @@ class TableWrapper { } step_per_epoch_ = init_options.step_per_epoch; mkv_options_.reserved_key_start_bit = init_options.reserved_key_start_bit; + static constexpr size_t default_chunk_buckets = 512; + size_t min_chunk_buckets = 1; + for (size_t pow_n = 1; pow_n <= 63; ++pow_n) { + if (mkv_options_.max_bucket_size * (1 << pow_n) > + mkv_options_.init_capacity) { + min_chunk_buckets = 1 << (pow_n - 1); + break; + } + } + mkv_options_.num_of_buckets_per_alloc = + mkv_options_.init_capacity > + (mkv_options_.max_bucket_size * default_chunk_buckets) + ? default_chunk_buckets + : min_chunk_buckets; curr_epoch_ = 0; curr_step_ = 1; From 743cc4b10993aec388c7bcb0174e6bff89b90f6a Mon Sep 17 00:00:00 2001 From: MoFHeka Date: Sat, 15 Jun 2024 03:35:04 +0800 Subject: [PATCH 2/2] [fix] Missing Bucketize class in DE keras horovod demo. --- .../movielens-1m-keras-with-horovod.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/demo/dynamic_embedding/movielens-1m-keras-with-horovod/movielens-1m-keras-with-horovod.py b/demo/dynamic_embedding/movielens-1m-keras-with-horovod/movielens-1m-keras-with-horovod.py index ed29ce058..07a6a9094 100644 --- a/demo/dynamic_embedding/movielens-1m-keras-with-horovod/movielens-1m-keras-with-horovod.py +++ b/demo/dynamic_embedding/movielens-1m-keras-with-horovod/movielens-1m-keras-with-horovod.py @@ -184,6 +184,25 @@ def embedding_out_split(embedding_out_concat, input_split_dims): return embedding_out +class Bucketize(tf.keras.layers.Layer): + + def __init__(self, boundaries, **kwargs): + self.boundaries = boundaries + super(Bucketize, self).__init__(**kwargs) + + def build(self, input_shape): + # Be sure to call this somewhere! + super(Bucketize, self).build(input_shape) + + def call(self, x, **kwargs): + return tf.raw_ops.Bucketize(input=x, boundaries=self.boundaries) + + def get_config(self,): + config = {'boundaries': self.boundaries} + base_config = super(Bucketize, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + class ChannelEmbeddingLayers(tf.keras.layers.Layer): def __init__(self,