From 9a55bed79b6f1228f60d0a1ac01690b4ae1615d8 Mon Sep 17 00:00:00 2001
From: MoFHeka <mofhejia@163.com>
Date: Sat, 15 Jun 2024 03:34:19 +0800
Subject: [PATCH 1/2] [feat] Add new setting num_of_buckets_per_alloc from HKV
 bata 12. It might improve performance of memory access. And this feature also
 reduce unessential BFC reallocating information to user when CUDA OOM.

---
 WORKSPACE                                          |  6 +++---
 .../core/kernels/lookup_impl/lookup_table_op_hkv.h | 14 ++++++++++++++
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/WORKSPACE b/WORKSPACE
index 92f96379c..dade0d058 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -57,9 +57,9 @@ http_archive(
 http_archive(
     name = "hkv",
     build_file = "//build_deps/toolchains/hkv:hkv.BUILD",
-    sha256 = "0535477e7cd82affa940468647c07caacd54d42a2c775cfdf199b3aa6e4f87a6",
-    strip_prefix = "HierarchicalKV-0.1.0-beta.11",
-    url = "https://github.com/NVIDIA-Merlin/HierarchicalKV/archive/refs/tags/v0.1.0-beta.11.tar.gz",
+    sha256 = "a73d7bea159173db2038f7c5215a7d1fbd5362adfb232fabde206dc64a1e817c",
+    strip_prefix = "HierarchicalKV-0.1.0-beta.12",
+    url = "https://github.com/NVIDIA-Merlin/HierarchicalKV/archive/refs/tags/v0.1.0-beta.12.tar.gz",
 )
 
 tf_configure(
diff --git a/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_hkv.h b/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_hkv.h
index 41f999396..8931814b2 100644
--- a/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_hkv.h
+++ b/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_hkv.h
@@ -472,6 +472,20 @@ class TableWrapper {
     }
     step_per_epoch_ = init_options.step_per_epoch;
     mkv_options_.reserved_key_start_bit = init_options.reserved_key_start_bit;
+    static constexpr size_t default_chunk_buckets = 512;
+    size_t min_chunk_buckets = 1;
+    for (size_t pow_n = 1; pow_n <= 63; ++pow_n) {
+      if (mkv_options_.max_bucket_size * (1 << pow_n) >
+          mkv_options_.init_capacity) {
+        min_chunk_buckets = 1 << (pow_n - 1);
+        break;
+      }
+    }
+    mkv_options_.num_of_buckets_per_alloc =
+        mkv_options_.init_capacity >
+                (mkv_options_.max_bucket_size * default_chunk_buckets)
+            ? default_chunk_buckets
+            : min_chunk_buckets;
     curr_epoch_ = 0;
     curr_step_ = 1;
 

From 743cc4b10993aec388c7bcb0174e6bff89b90f6a Mon Sep 17 00:00:00 2001
From: MoFHeka <mofhejia@163.com>
Date: Sat, 15 Jun 2024 03:35:04 +0800
Subject: [PATCH 2/2] [fix] Missing Bucketize class in DE keras horovod demo.

---
 .../movielens-1m-keras-with-horovod.py        | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/demo/dynamic_embedding/movielens-1m-keras-with-horovod/movielens-1m-keras-with-horovod.py b/demo/dynamic_embedding/movielens-1m-keras-with-horovod/movielens-1m-keras-with-horovod.py
index ed29ce058..07a6a9094 100644
--- a/demo/dynamic_embedding/movielens-1m-keras-with-horovod/movielens-1m-keras-with-horovod.py
+++ b/demo/dynamic_embedding/movielens-1m-keras-with-horovod/movielens-1m-keras-with-horovod.py
@@ -184,6 +184,25 @@ def embedding_out_split(embedding_out_concat, input_split_dims):
   return embedding_out
 
 
+class Bucketize(tf.keras.layers.Layer):
+
+  def __init__(self, boundaries, **kwargs):
+    self.boundaries = boundaries
+    super(Bucketize, self).__init__(**kwargs)
+
+  def build(self, input_shape):
+    # Be sure to call this somewhere!
+    super(Bucketize, self).build(input_shape)
+
+  def call(self, x, **kwargs):
+    return tf.raw_ops.Bucketize(input=x, boundaries=self.boundaries)
+
+  def get_config(self,):
+    config = {'boundaries': self.boundaries}
+    base_config = super(Bucketize, self).get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+
+
 class ChannelEmbeddingLayers(tf.keras.layers.Layer):
 
   def __init__(self,