diff --git a/paddle/fluid/framework/data_feed.cu b/paddle/fluid/framework/data_feed.cu index 43dd09f2f4bad1..16161f20078216 100644 --- a/paddle/fluid/framework/data_feed.cu +++ b/paddle/fluid/framework/data_feed.cu @@ -1540,40 +1540,6 @@ int GraphDataGenerator::FillSlotFeature(uint64_t *d_walk, size_t key_num) { return 0; } -int GraphDataGenerator::FillFeatureBuf(uint64_t *d_walk, - uint64_t *d_feature, - size_t key_num) { - platform::CUDADeviceGuard guard(gpuid_); - - auto gpu_graph_ptr = GraphGpuWrapper::GetInstance(); - int ret = gpu_graph_ptr->get_feature_of_nodes( - gpuid_, - d_walk, - d_feature, - key_num, - slot_num_, - reinterpret_cast(d_slot_feature_num_map_->ptr()), - fea_num_per_node_); - return ret; -} - -int GraphDataGenerator::FillFeatureBuf( - std::shared_ptr d_walk, - std::shared_ptr d_feature) { - platform::CUDADeviceGuard guard(gpuid_); - - auto gpu_graph_ptr = GraphGpuWrapper::GetInstance(); - int ret = gpu_graph_ptr->get_feature_of_nodes( - gpuid_, - reinterpret_cast(d_walk->ptr()), - reinterpret_cast(d_feature->ptr()), - buf_size_, - slot_num_, - reinterpret_cast(d_slot_feature_num_map_->ptr()), - fea_num_per_node_); - return ret; -} - // 对于deepwalk模式,尝试插入table,0表示插入成功,1表示插入失败; // 对于sage模式,尝试插入table,table数量不够则清空table重新插入,返回值无影响。 int GraphDataGenerator::InsertTable( diff --git a/paddle/fluid/framework/data_feed.h b/paddle/fluid/framework/data_feed.h index 224415ceb481ff..ad45fc244acecb 100644 --- a/paddle/fluid/framework/data_feed.h +++ b/paddle/fluid/framework/data_feed.h @@ -943,9 +943,6 @@ class GraphDataGenerator { int FillInferBuf(); void DoWalkandSage(); int FillSlotFeature(uint64_t* d_walk); - int FillFeatureBuf(uint64_t* d_walk, uint64_t* d_feature, size_t key_num); - int FillFeatureBuf(std::shared_ptr d_walk, - std::shared_ptr d_feature); void FillOneStep(uint64_t* start_ids, int etype_id, uint64_t* walk, diff --git a/paddle/fluid/framework/fleet/heter_ps/gpu_graph_utils.h b/paddle/fluid/framework/fleet/heter_ps/gpu_graph_utils.h index fbdcb181e0f285..eb2d87f8c566a4 100644 --- a/paddle/fluid/framework/fleet/heter_ps/gpu_graph_utils.h +++ b/paddle/fluid/framework/fleet/heter_ps/gpu_graph_utils.h @@ -134,6 +134,46 @@ inline void debug_gpu_memory_info(const char* desc) { << "desc=" << desc; } } + +inline void show_gpu_mem(const char* desc) { + CudaDeviceRestorer r; + + int device_num = 0; + auto err = cudaGetDeviceCount(&device_num); + PADDLE_ENFORCE_EQ( + err, + cudaSuccess, + platform::errors::InvalidArgument("cudaGetDeviceCount failed!")); + + size_t avail{0}; + size_t total{0}; + for (int i = 0; i < device_num; ++i) { + cudaSetDevice(i); + auto err = cudaMemGetInfo(&avail, &total); + PADDLE_ENFORCE_EQ( + err, + cudaSuccess, + platform::errors::InvalidArgument("cudaMemGetInfo failed!")); + VLOG(0) << "[" << desc << "] hbm on device " << i << ", " + << "avail=" << avail / 1024.0 / 1024.0 / 1024.0 << "g, " + << "total=" << total / 1024.0 / 1024.0 / 1024.0 << "g"; + } +} + +inline void show_cpu_mem(const char* desc) { + //MB + long virtual_mem = 0, resident_mem = 0; + + FILE * fp = fopen("/proc/self/statm", "r"); + if (NULL != fp) { + fscanf(fp, "%ld %ld", &virtual_mem, &resident_mem); + resident_mem = resident_mem * 4096 / 1000000; + virtual_mem = virtual_mem * 4096 / 1000000; + fclose(fp); + } + + VLOG(0) << "[" << desc << "] mem used " << resident_mem << "MB"; +} }; // namespace framework }; // namespace paddle diff --git a/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.cu b/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.cu index c21cf26b59358c..e5c4272466f9a3 100644 --- a/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.cu +++ b/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.cu @@ -752,6 +752,12 @@ void GraphGpuWrapper::finalize() { reinterpret_cast(graph_table)->show_table_collisions(); } +void GraphGpuWrapper::show_mem(const char* msg) +{ + show_cpu_mem(msg); + show_gpu_mem(msg); +} + // edge table void GraphGpuWrapper::upload_batch(int table_type, int slice_num, diff --git a/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.h b/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.h index d1c4e91a8fbb3e..7b8ae2e1a1b8bf 100644 --- a/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.h +++ b/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.h @@ -198,6 +198,7 @@ class GraphGpuWrapper { std::unordered_map& get_graph_type_to_index(); std::string& get_node_type_size(std::string first_node_type); std::string& get_edge_type_size(); + void show_mem(const char* msg); std::unordered_map edge_to_id, node_to_id; std::vector id_to_feature, id_to_edge; diff --git a/paddle/fluid/pybind/fleet_py.cc b/paddle/fluid/pybind/fleet_py.cc index c25aa49b42acc7..e33b66a6dae335 100644 --- a/paddle/fluid/pybind/fleet_py.cc +++ b/paddle/fluid/pybind/fleet_py.cc @@ -389,6 +389,7 @@ void BindGraphGpuWrapper(py::module* m) { bool>( &GraphGpuWrapper::load_edge_file)) .def("load_node_and_edge", &GraphGpuWrapper::load_node_and_edge) + .def("show_mem", &GraphGpuWrapper::show_mem) .def("upload_batch", py::overload_cast( &GraphGpuWrapper::upload_batch))