Skip to content

Commit

Permalink
add hadoop
Browse files Browse the repository at this point in the history
  • Loading branch information
luliyucoordinate committed Jul 2, 2022
1 parent 48993ea commit 8e262b0
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 1 deletion.
10 changes: 10 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,16 @@ http_archive(
url = "https://github.com/sewenew/redis-plus-plus/archive/refs/tags/1.2.3.zip",
)

http_archive(
name = "hadoop",
build_file = "//third_party:hadoop.BUILD",
sha256 = "fa9d0587d06c36838e778081bcf8271a9c63060af00b3bf456423c1777a62043",
strip_prefix = "hadoop-rel-release-3.3.0",
urls = [
"https://github.com/apache/hadoop/archive/refs/tags/rel/release-3.3.0.tar.gz",
],
)

tf_configure(
name = "local_config_tf",
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ limitations under the License.
#include "tensorflow/core/lib/gtl/inlined_vector.h"
#include "tensorflow/core/lib/io/buffered_inputstream.h"
#include "tensorflow/core/lib/io/random_inputstream.h"
#include "tensorflow/core/platform/hadoop/hadoop_file_system.h"
#include "tensorflow_recommenders_addons/dynamic_embedding/core/lib/cuckoo/cuckoohash_map.hh"
#include "tensorflow_recommenders_addons/dynamic_embedding/core/utils/hadoop_file_system.h"
#include "tensorflow_recommenders_addons/dynamic_embedding/core/utils/types.h"

namespace tensorflow {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#ifndef TENSORFLOW_CORE_PLATFORM_HADOOP_HADOOP_FILE_SYSTEM_H_
#define TENSORFLOW_CORE_PLATFORM_HADOOP_HADOOP_FILE_SYSTEM_H_

#include <map>

#include "hdfs/hdfs.h"
#include "tensorflow/core/platform/env.h"

extern "C" {
struct hdfs_internal;
typedef hdfs_internal* hdfsFS;
}

namespace tensorflow {

class LibHDFS;

class HadoopFileSystem : public FileSystem {
public:
HadoopFileSystem();
~HadoopFileSystem();

TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT;

Status NewRandomAccessFile(
const string& fname, TransactionToken* token,
std::unique_ptr<RandomAccessFile>* result) override;

Status NewWritableFile(const string& fname, TransactionToken* token,
std::unique_ptr<WritableFile>* result) override;

Status NewAppendableFile(const string& fname, TransactionToken* token,
std::unique_ptr<WritableFile>* result) override;

Status NewReadOnlyMemoryRegionFromFile(
const string& fname, TransactionToken* token,
std::unique_ptr<ReadOnlyMemoryRegion>* result) override;

Status FileExists(const string& fname, TransactionToken* token) override;

Status GetChildren(const string& dir, TransactionToken* token,
std::vector<string>* result) override;

Status GetMatchingPaths(const string& pattern, TransactionToken* token,
std::vector<string>* results) override;

Status DeleteFile(const string& fname, TransactionToken* token) override;

Status CreateDir(const string& dir, TransactionToken* token) override;

Status DeleteDir(const string& dir, TransactionToken* token) override;

Status GetFileSize(const string& fname, TransactionToken* token,
uint64* size) override;

Status RenameFile(const string& src, const string& target,
TransactionToken* token) override;

Status Stat(const string& fname, TransactionToken* token,
FileStatistics* stat) override;

string TranslateName(const string& name) const override;

private:
mutex mu_;
std::map<std::string, hdfsFS> connectionCache_ TF_GUARDED_BY(mu_);
Status Connect(StringPiece fname, hdfsFS* fs);
};

Status SplitArchiveNameAndPath(StringPiece& path, string& nn);

} // namespace tensorflow

#endif // TENSORFLOW_CORE_PLATFORM_HADOOP_HADOOP_FILE_SYSTEM_H_
9 changes: 9 additions & 0 deletions third_party/hadoop.BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package(default_visibility = ["//visibility:public"])

cc_library(
name = "hadoop",
hdrs = ["hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/include/hdfs/hdfs.h"],
copts = [],
includes = ["hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/include"],
visibility = ["//visibility:public"],
)

0 comments on commit 8e262b0

Please sign in to comment.