tensorflow · rhdong · May 22, 2024 · May 20, 2024 · MoFHeka · May 22, 2024
diff --git a/README.md b/README.md
@@ -174,19 +174,27 @@ Only `TF_NEED_CUDA=1` is required and other environment variables are optional:
 export TF_VERSION="2.15.1"  # "2.11.0" is well tested.
 export PY_VERSION="3.9" 
 export TF_NEED_CUDA=1
-export TF_CUDA_VERSION=12.2
-export TF_CUDNN_VERSION=8.9
+export TF_CUDA_VERSION=12.2 # nvcc --version to check version
+export TF_CUDNN_VERSION=8.9 # print("cuDNN version:", tf.sysconfig.get_build_info()["cudnn_version"])
 export CUDA_TOOLKIT_PATH="/usr/local/cuda"
 export CUDNN_INSTALL_PATH="/usr/lib/x86_64-linux-gnu"
 
 python configure.py
 ```
 And then build the pip package and install:
-```sh`
+```sh
 bazel build --enable_runfiles build_pip_pkg
-bazel-bin/build_pip_pkg artifacts`
+bazel-bin/build_pip_pkg artifacts
 pip install artifacts/tensorflow_recommenders_addons_gpu-*.whl
 ```
+to run unit test
+```sh
+cp -f ./bazel-bin/tensorflow_recommenders_addons/dynamic_embedding/core/*.so ./tensorflow_recommenders_addons/dynamic_embedding/core/
+pip install pytest
+python tensorflow_recommenders_addons/tests/run_all_test.py
+# and run pytest such as
+pytest -s tensorflow_recommenders_addons/dynamic_embedding/python/kernel_tests/hkv_hashtable_ops_test.py
+```
 
 #### Apple Silicon Support
 Requirements:

diff --git a/WORKSPACE b/WORKSPACE
@@ -57,9 +57,9 @@ http_archive(
 http_archive(
     name = "hkv",
     build_file = "//build_deps/toolchains/hkv:hkv.BUILD",
-    sha256 = "79c59b19c03b771cdcb6deb3c6a3213353482f4d07cb1ddb53c4b001a0f58b29",
-    strip_prefix = "HierarchicalKV-0.1.0-beta.10",
-    url = "https://github.com/NVIDIA-Merlin/HierarchicalKV/archive/refs/tags/v0.1.0-beta.10.tar.gz",
+    sha256 = "0535477e7cd82affa940468647c07caacd54d42a2c775cfdf199b3aa6e4f87a6",
+    strip_prefix = "HierarchicalKV-0.1.0-beta.11",
+    url = "https://github.com/NVIDIA-Merlin/HierarchicalKV/archive/refs/tags/v0.1.0-beta.11.tar.gz",
 )
 
 tf_configure(

diff --git a/docs/api_docs/tfra/dynamic_embedding/HkvHashTableConfig.md b/docs/api_docs/tfra/dynamic_embedding/HkvHashTableConfig.md
@@ -57,10 +57,13 @@ HkvHashTableConfig contains three parameters to configure the HashTable, They al
 * <b>`init_capacity`</b>: The initial capacity of the hash table.
 * <b>`max_capacity`</b>: The maximum capacity of the hash table.
 * <b>`max_hbm_for_values`</b>: The maximum HBM for values, in bytes.
+* <b>`reserved_key_start_bit`</b>: The HKV [Reserved Keys](https://github.com/NVIDIA-Merlin/HierarchicalKV?tab=readme-ov-file#reserved-keys)
+start bit, default is 0. 
 
 #### Configuration Suggestion
 
 * <b>`Pure HBM mode`</b>: set the max_hbm_for_values >= sizeof(V) * dim * max_capacity
 * <b>`HBM + HMEM mode`</b>: set the max_hbm_for_values < sizeof(V) * dim * max_capacity
 * <b>`Pure HMEM mode`</b>: set the max_hbm_for_values = 0
 * if max_capacity == init_capacity, the HBM + HMEM consumption = sizeof(V) * dim * max_capacity
+* <b>`reserved_key_start_bit`</b>: If you don't use The keys of 0xFFFFFFFFFFFFFFFD, 0xFFFFFFFFFFFFFFFE, and 0xFFFFFFFFFFFFFFFF as key, you don't need to change it.
@@ -98,6 +98,10 @@ class HkvHashTableOfTensorsGpu final : public LookupInterface {
     options.max_capacity = static_cast<size_t>(max_capacity_i64);
     options.max_hbm_for_vectors = static_cast<size_t>(max_hbm_for_vectors_i64);
     options.step_per_epoch = step_per_epoch;
+    int reserved_key_start_bit = 0;
+    OP_REQUIRES_OK(ctx, GetNodeAttr(kernel->def(), "reserved_key_start_bit",
+                                    &reserved_key_start_bit));
+    options.reserved_key_start_bit = reserved_key_start_bit;
 
     if (options.max_capacity == 0) {
       char* env_max_capacity_str =

@@ -306,6 +306,7 @@ struct TableWrapperInitOptions {
   size_t max_hbm_for_vectors;
   size_t max_bucket_size;
   int64_t step_per_epoch;
+  int reserved_key_start_bit;
 
   float max_load_factor;
   int block_size;
@@ -470,6 +471,7 @@ class TableWrapper {
         break;
     }
     step_per_epoch_ = init_options.step_per_epoch;
+    mkv_options_.reserved_key_start_bit = init_options.reserved_key_start_bit;
     curr_epoch_ = 0;
     curr_step_ = 1;
 

@@ -308,6 +308,7 @@ REGISTER_OP(PREFIX_OP_NAME(HkvHashTableOfTensors))
     .Attr("max_hbm_for_vectors: int = 0")
     .Attr("step_per_epoch: int = 0")
     .Attr("strategy: int = 0")
+    .Attr("reserved_key_start_bit: int = 0")
     .SetIsStateful()
     .SetShapeFn([](InferenceContext* c) {
       PartialTensorShape value_p;

@@ -455,7 +455,8 @@ def test_evict_strategy_epoch_lru(self):
                                            max_hbm_for_values=1024 * 64,
                                            evict_strategy=strategy,
                                            step_per_epoch=1,
-                                           gen_scores_fn=gen_scores_fn)))
+                                           gen_scores_fn=gen_scores_fn,
+                                           reserved_key_start_bit=1)))
 
       base_epoch_lfu_scores_list = [1, 1 + (1 << 32), 1 + (2 << 32)]
 

@@ -91,7 +91,7 @@ def test_basic(self):
               dim=8,
               init_size=1024,
               kv_creator=de.HkvHashTableCreator(config=de.HkvHashTableConfig(
-                  max_capacity=99999)))
+                  max_capacity=99999, reserved_key_start_bit=1)))
           self.assertTrue(dev_str in printed.contents())
 
           self.evaluate(table.size())
@@ -122,7 +122,7 @@ def _convert(v, t):
       for device, use_gpu in test_list:
         with self.session(config=default_config, use_gpu=use_gpu) as sess:
           keys = constant_op.constant(
-              np.array([0, 1, 2, 3]).astype(_type_converter(key_dtype)),
+              np.array([0, 1, 2, -1]).astype(_type_converter(key_dtype)),
               key_dtype)
           values = constant_op.constant(
               _convert([[0] * dim, [1] * dim, [2] * dim, [3] * dim],
@@ -136,7 +136,7 @@ def _convert(v, t):
               dim=dim,
               init_size=1024,
               kv_creator=de.HkvHashTableCreator(config=de.HkvHashTableConfig(
-                  max_capacity=99999)))
+                  max_capacity=99999, reserved_key_start_bit=1)))
           table.clear()
           id += 1
 
@@ -170,7 +170,7 @@ def _convert(v, t):
           # exported data is in the order of the internal map, i.e. undefined
           sorted_keys = np.sort(self.evaluate(exported_keys))
           sorted_values = np.sort(self.evaluate(exported_values), axis=0)
-          self.assertAllEqual(_convert([0, 2, 3], key_dtype),
+          self.assertAllEqual(_convert([-1, 0, 2], key_dtype),
                               _convert(sorted_keys, key_dtype))
           self.assertAllEqual(
               _convert([[0] * dim, [2] * dim, [3] * dim], value_dtype),

@@ -156,6 +156,7 @@ def __init__(
       evict_strategy=HkvEvictStrategy.LRU,
       step_per_epoch=0,
       gen_scores_fn=None,
+      reserved_key_start_bit=0,
   ):
     """ CuckooHashTableConfig include nothing for parameter default satisfied.
     """
@@ -165,6 +166,7 @@ def __init__(
     self.evict_strategy = evict_strategy
     self.step_per_epoch = step_per_epoch
     self.gen_scores_fn = gen_scores_fn
+    self.reserved_key_start_bit = reserved_key_start_bit
 
 
 class HkvHashTableCreator(KVCreator):
@@ -192,13 +194,15 @@ def create(
     self.evict_strategy = HkvEvictStrategy.LRU
     self.step_per_epoch = 0
     self.gen_scores_fn = None
+    self.reserved_key_start_bit = 0
     if self.config and isinstance(self.config, de.HkvHashTableConfig):
       self.init_capacity = self.config.init_capacity
       self.max_capacity = self.config.max_capacity
       self.max_hbm_for_values = self.config.max_hbm_for_values
       self.evict_strategy = self.config.evict_strategy
       self.step_per_epoch = self.config.step_per_epoch
       self.gen_scores_fn = self.config.gen_scores_fn
+      self.reserved_key_start_bit = self.config.reserved_key_start_bit
     self.device = device
     self.shard_saveable_object_fn = shard_saveable_object_fn
 
@@ -216,7 +220,8 @@ def create(
         gen_scores_fn=self.gen_scores_fn,
         config=self.config,
         device=self.device,
-        shard_saveable_object_fn=self.shard_saveable_object_fn)
+        shard_saveable_object_fn=self.shard_saveable_object_fn,
+        reserved_key_start_bit=self.reserved_key_start_bit)
 
   def get_config(self):
     if not context.executing_eagerly():

@@ -79,6 +79,7 @@ def __init__(
       evict_strategy=HkvEvictStrategy.LRU,
       step_per_epoch=0,
       gen_scores_fn=None,
+      reserved_key_start_bit=0,
   ):
     """Creates an empty `HkvHashTable` object.
 
@@ -124,15 +125,15 @@ def __init__(
     self._step_per_epoch = step_per_epoch
     self._gen_scores_fn = gen_scores_fn
     self._default_scores = tf.constant([], dtypes.int64)
-
+    self._reserved_key_start_bit = reserved_key_start_bit
     if self._config:
       self._init_capacity = self._config.init_capacity
       self._max_capacity = self._config.max_capacity
       self._max_hbm_for_values = self._config.max_hbm_for_values
       self._evict_strategy = self._config.evict_strategy
       self._step_per_epoch = self._config.step_per_epoch
       self._gen_scores_fn = self._config.gen_scores_fn
-
+      self._reserved_key_start_bit = self._config.reserved_key_start_bit
     self._shared_name = None
     if context.executing_eagerly():
       # TODO(allenl): This will leak memory due to kernel caching by the
@@ -183,6 +184,7 @@ def _create_resource(self):
           strategy=self._evict_strategy.value,
           step_per_epoch=self._step_per_epoch,
           name=self._name,
+          reserved_key_start_bit=self._reserved_key_start_bit,
       )
 
     if context.executing_eagerly():