diff --git a/WORKSPACE b/WORKSPACE index 7df12b577..dc59b6b7c 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -54,6 +54,16 @@ http_archive( url = "https://github.com/sewenew/redis-plus-plus/archive/refs/tags/1.2.3.zip", ) +http_archive( + name = "hadoop", + build_file = "//third_party:hadoop.BUILD", + sha256 = "fa9d0587d06c36838e778081bcf8271a9c63060af00b3bf456423c1777a62043", + strip_prefix = "hadoop-rel-release-3.3.0", + urls = [ + "https://github.com/apache/hadoop/archive/refs/tags/rel/release-3.3.0.tar.gz", + ], +) + tf_configure( name = "local_config_tf", ) diff --git a/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_cpu.h b/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_cpu.h index d38f09a7c..045ccfabe 100644 --- a/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_cpu.h +++ b/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_cpu.h @@ -29,8 +29,8 @@ limitations under the License. #include "tensorflow/core/lib/gtl/inlined_vector.h" #include "tensorflow/core/lib/io/buffered_inputstream.h" #include "tensorflow/core/lib/io/random_inputstream.h" -#include "tensorflow/core/platform/hadoop/hadoop_file_system.h" #include "tensorflow_recommenders_addons/dynamic_embedding/core/lib/cuckoo/cuckoohash_map.hh" +#include "tensorflow_recommenders_addons/dynamic_embedding/core/utils/hadoop_file_system.h" #include "tensorflow_recommenders_addons/dynamic_embedding/core/utils/types.h" namespace tensorflow { diff --git a/tensorflow_recommenders_addons/dynamic_embedding/core/utils/hadoop_file_system.h b/tensorflow_recommenders_addons/dynamic_embedding/core/utils/hadoop_file_system.h new file mode 100755 index 000000000..7444a3b7d --- /dev/null +++ b/tensorflow_recommenders_addons/dynamic_embedding/core/utils/hadoop_file_system.h @@ -0,0 +1,89 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_PLATFORM_HADOOP_HADOOP_FILE_SYSTEM_H_ +#define TENSORFLOW_CORE_PLATFORM_HADOOP_HADOOP_FILE_SYSTEM_H_ + +#include + +#include "hdfs/hdfs.h" +#include "tensorflow/core/platform/env.h" + +extern "C" { +struct hdfs_internal; +typedef hdfs_internal* hdfsFS; +} + +namespace tensorflow { + +class LibHDFS; + +class HadoopFileSystem : public FileSystem { + public: + HadoopFileSystem(); + ~HadoopFileSystem(); + + TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT; + + Status NewRandomAccessFile( + const string& fname, TransactionToken* token, + std::unique_ptr* result) override; + + Status NewWritableFile(const string& fname, TransactionToken* token, + std::unique_ptr* result) override; + + Status NewAppendableFile(const string& fname, TransactionToken* token, + std::unique_ptr* result) override; + + Status NewReadOnlyMemoryRegionFromFile( + const string& fname, TransactionToken* token, + std::unique_ptr* result) override; + + Status FileExists(const string& fname, TransactionToken* token) override; + + Status GetChildren(const string& dir, TransactionToken* token, + std::vector* result) override; + + Status GetMatchingPaths(const string& pattern, TransactionToken* token, + std::vector* results) override; + + Status DeleteFile(const string& fname, TransactionToken* token) override; + + Status CreateDir(const string& dir, TransactionToken* token) override; + + Status DeleteDir(const string& dir, TransactionToken* token) override; + + Status GetFileSize(const string& fname, TransactionToken* token, + uint64* size) override; + + Status RenameFile(const string& src, const string& target, + TransactionToken* token) override; + + Status Stat(const string& fname, TransactionToken* token, + FileStatistics* stat) override; + + string TranslateName(const string& name) const override; + + private: + mutex mu_; + std::map connectionCache_ TF_GUARDED_BY(mu_); + Status Connect(StringPiece fname, hdfsFS* fs); +}; + +Status SplitArchiveNameAndPath(StringPiece& path, string& nn); + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_PLATFORM_HADOOP_HADOOP_FILE_SYSTEM_H_ diff --git a/third_party/hadoop.BUILD b/third_party/hadoop.BUILD new file mode 100644 index 000000000..d62e2d573 --- /dev/null +++ b/third_party/hadoop.BUILD @@ -0,0 +1,9 @@ +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "hadoop", + hdrs = ["hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/include/hdfs/hdfs.h"], + copts = [], + includes = ["hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/include"], + visibility = ["//visibility:public"], +)