From 67d7ee9da6b1c41c95992f82dd6c30dd5a4bea27 Mon Sep 17 00:00:00 2001 From: huwei02 <53012141+huwei02@users.noreply.github.com> Date: Sat, 1 Apr 2023 14:29:54 +0800 Subject: [PATCH] add show_mem (#249) Co-authored-by: root --- paddle/fluid/framework/data_feed.cu | 34 ---------------- paddle/fluid/framework/data_feed.h | 3 -- .../fleet/heter_ps/gpu_graph_utils.h | 40 +++++++++++++++++++ .../fleet/heter_ps/graph_gpu_wrapper.cu | 6 +++ .../fleet/heter_ps/graph_gpu_wrapper.h | 1 + paddle/fluid/pybind/fleet_py.cc | 1 + 6 files changed, 48 insertions(+), 37 deletions(-) diff --git a/paddle/fluid/framework/data_feed.cu b/paddle/fluid/framework/data_feed.cu index 84563127cc7d4..173ea03ee7c39 100644 --- a/paddle/fluid/framework/data_feed.cu +++ b/paddle/fluid/framework/data_feed.cu @@ -1545,40 +1545,6 @@ int GraphDataGenerator::FillSlotFeature(uint64_t *d_walk, size_t key_num) { return 0; } -int GraphDataGenerator::FillFeatureBuf(uint64_t *d_walk, - uint64_t *d_feature, - size_t key_num) { - platform::CUDADeviceGuard guard(gpuid_); - - auto gpu_graph_ptr = GraphGpuWrapper::GetInstance(); - int ret = gpu_graph_ptr->get_feature_of_nodes( - gpuid_, - d_walk, - d_feature, - key_num, - slot_num_, - reinterpret_cast(d_slot_feature_num_map_->ptr()), - fea_num_per_node_); - return ret; -} - -int GraphDataGenerator::FillFeatureBuf( - std::shared_ptr d_walk, - std::shared_ptr d_feature) { - platform::CUDADeviceGuard guard(gpuid_); - - auto gpu_graph_ptr = GraphGpuWrapper::GetInstance(); - int ret = gpu_graph_ptr->get_feature_of_nodes( - gpuid_, - reinterpret_cast(d_walk->ptr()), - reinterpret_cast(d_feature->ptr()), - buf_size_, - slot_num_, - reinterpret_cast(d_slot_feature_num_map_->ptr()), - fea_num_per_node_); - return ret; -} - // 对于deepwalk模式,尝试插入table,0表示插入成功,1表示插入失败; // 对于sage模式,尝试插入table,table数量不够则清空table重新插入,返回值无影响。 int GraphDataGenerator::InsertTable( diff --git a/paddle/fluid/framework/data_feed.h b/paddle/fluid/framework/data_feed.h index 05907bead5940..fbf8657353e02 100644 --- a/paddle/fluid/framework/data_feed.h +++ b/paddle/fluid/framework/data_feed.h @@ -907,9 +907,6 @@ class GraphDataGenerator { int FillInferBuf(); void DoWalkandSage(); int FillSlotFeature(uint64_t* d_walk); - int FillFeatureBuf(uint64_t* d_walk, uint64_t* d_feature, size_t key_num); - int FillFeatureBuf(std::shared_ptr d_walk, - std::shared_ptr d_feature); void FillOneStep(uint64_t* start_ids, int etype_id, uint64_t* walk, diff --git a/paddle/fluid/framework/fleet/heter_ps/gpu_graph_utils.h b/paddle/fluid/framework/fleet/heter_ps/gpu_graph_utils.h index 655e3c2a5cf0d..d746f3f89eaf8 100644 --- a/paddle/fluid/framework/fleet/heter_ps/gpu_graph_utils.h +++ b/paddle/fluid/framework/fleet/heter_ps/gpu_graph_utils.h @@ -133,6 +133,46 @@ inline void debug_gpu_memory_info(const char* desc) { << "desc=" << desc; } } + +inline void show_gpu_mem(const char* desc) { + CudaDeviceRestorer r; + + int device_num = 0; + auto err = cudaGetDeviceCount(&device_num); + PADDLE_ENFORCE_EQ( + err, + cudaSuccess, + platform::errors::InvalidArgument("cudaGetDeviceCount failed!")); + + size_t avail{0}; + size_t total{0}; + for (int i = 0; i < device_num; ++i) { + cudaSetDevice(i); + auto err = cudaMemGetInfo(&avail, &total); + PADDLE_ENFORCE_EQ( + err, + cudaSuccess, + platform::errors::InvalidArgument("cudaMemGetInfo failed!")); + VLOG(0) << "[" << desc << "] hbm on device " << i << ", " + << "avail=" << avail / 1024.0 / 1024.0 / 1024.0 << "g, " + << "total=" << total / 1024.0 / 1024.0 / 1024.0 << "g"; + } +} + +inline void show_cpu_mem(const char* desc) { + //MB + long virtual_mem = 0, resident_mem = 0; + + FILE * fp = fopen("/proc/self/statm", "r"); + if (NULL != fp) { + fscanf(fp, "%ld %ld", &virtual_mem, &resident_mem); + resident_mem = resident_mem * 4096 / 1000000; + virtual_mem = virtual_mem * 4096 / 1000000; + fclose(fp); + } + + VLOG(0) << "[" << desc << "] mem used " << resident_mem << "MB"; +} }; // namespace framework }; // namespace paddle diff --git a/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.cu b/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.cu index 424be56edb3b6..905bca753fd2e 100644 --- a/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.cu +++ b/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.cu @@ -752,6 +752,12 @@ void GraphGpuWrapper::finalize() { reinterpret_cast(graph_table)->show_table_collisions(); } +void GraphGpuWrapper::show_mem(const char* msg) +{ + show_cpu_mem(msg); + show_gpu_mem(msg); +} + // edge table void GraphGpuWrapper::upload_batch(int table_type, int slice_num, diff --git a/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.h b/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.h index d1c4e91a8fbb3..7b8ae2e1a1b8b 100644 --- a/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.h +++ b/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.h @@ -198,6 +198,7 @@ class GraphGpuWrapper { std::unordered_map& get_graph_type_to_index(); std::string& get_node_type_size(std::string first_node_type); std::string& get_edge_type_size(); + void show_mem(const char* msg); std::unordered_map edge_to_id, node_to_id; std::vector id_to_feature, id_to_edge; diff --git a/paddle/fluid/pybind/fleet_py.cc b/paddle/fluid/pybind/fleet_py.cc index 06d17f8f5d92c..8a902547f782a 100644 --- a/paddle/fluid/pybind/fleet_py.cc +++ b/paddle/fluid/pybind/fleet_py.cc @@ -389,6 +389,7 @@ void BindGraphGpuWrapper(py::module* m) { bool>( &GraphGpuWrapper::load_edge_file)) .def("load_node_and_edge", &GraphGpuWrapper::load_node_and_edge) + .def("show_mem", &GraphGpuWrapper::show_mem) .def("upload_batch", py::overload_cast( &GraphGpuWrapper::upload_batch))