From 019270302f22404da6c067bc64c184f11e0b62a3 Mon Sep 17 00:00:00 2001 From: haozhihan Date: Fri, 14 Jun 2024 16:26:02 +0800 Subject: [PATCH 1/2] delete some useless files --- source/module_psi/kernels/cuda/memory_op.cu | 288 -------- source/module_psi/kernels/device.cpp | 687 ------------------ source/module_psi/kernels/device.h | 56 -- source/module_psi/kernels/memory_op.h | 234 ------ source/module_psi/kernels/psi_memory_op.cpp | 338 --------- .../module_psi/kernels/rocm/memory_op.hip.cu | 269 ------- source/module_psi/kernels/test/CMakeLists.txt | 5 - .../module_psi/kernels/test/device_test.cpp | 31 - .../module_psi/kernels/test/memory_test.cpp | 349 --------- source/module_psi/kernels/types.h | 16 - 10 files changed, 2273 deletions(-) delete mode 100644 source/module_psi/kernels/cuda/memory_op.cu delete mode 100644 source/module_psi/kernels/device.cpp delete mode 100644 source/module_psi/kernels/device.h delete mode 100644 source/module_psi/kernels/memory_op.h delete mode 100644 source/module_psi/kernels/psi_memory_op.cpp delete mode 100644 source/module_psi/kernels/rocm/memory_op.hip.cu delete mode 100644 source/module_psi/kernels/test/CMakeLists.txt delete mode 100644 source/module_psi/kernels/test/device_test.cpp delete mode 100644 source/module_psi/kernels/test/memory_test.cpp delete mode 100644 source/module_psi/kernels/types.h diff --git a/source/module_psi/kernels/cuda/memory_op.cu b/source/module_psi/kernels/cuda/memory_op.cu deleted file mode 100644 index 89b745528d..0000000000 --- a/source/module_psi/kernels/cuda/memory_op.cu +++ /dev/null @@ -1,288 +0,0 @@ -// #include "module_psi/kernels/memory_op.h" - -// #include -// #include - -// #include -// #include -// #include - -// #define THREADS_PER_BLOCK 256 - -// namespace psi { -// namespace memory { - -// template -// __global__ void cast_memory( -// FPTYPE_out* out, -// const FPTYPE_in* in, -// const int size) -// { -// int idx = blockIdx.x * blockDim.x + threadIdx.x; -// if(idx >= size) {return;} -// out[idx] = static_cast(in[idx]); -// } - -// template -// __global__ void cast_memory( -// std::complex* out, -// const std::complex* in, -// const int size) -// { -// int idx = blockIdx.x * blockDim.x + threadIdx.x; -// if(idx >= size) {return;} -// auto* _out = reinterpret_cast*>(out); -// const auto* _in = reinterpret_cast*>(in); -// _out[idx] = static_cast>(_in[idx]); -// } - -// template -// void resize_memory_op::operator()(const base_device::DEVICE_GPU* dev, -// FPTYPE*& arr, -// const size_t size, -// const char* record_in) -// { -// if (arr != nullptr) { -// delete_memory_op()(dev, arr); -// } -// cudaErrcheck(cudaMalloc((void **)&arr, sizeof(FPTYPE) * size)); -// } - -// template -// void set_memory_op::operator()(const base_device::DEVICE_GPU* dev, -// FPTYPE* arr, -// const int var, -// const size_t size) -// { -// cudaErrcheck(cudaMemset(arr, var, sizeof(FPTYPE) * size)); -// } - -// template -// void synchronize_memory_op::operator()( -// const base_device::DEVICE_CPU* dev_out, -// const base_device::DEVICE_GPU* dev_in, -// FPTYPE* arr_out, -// const FPTYPE* arr_in, -// const size_t size) -// { -// cudaErrcheck(cudaMemcpy(arr_out, arr_in, sizeof(FPTYPE) * size, cudaMemcpyDeviceToHost)); -// } - -// template -// void synchronize_memory_op::operator()( -// const base_device::DEVICE_GPU* dev_out, -// const base_device::DEVICE_CPU* dev_in, -// FPTYPE* arr_out, -// const FPTYPE* arr_in, -// const size_t size) -// { -// cudaErrcheck(cudaMemcpy(arr_out, arr_in, sizeof(FPTYPE) * size, cudaMemcpyHostToDevice)); -// } - -// template -// void synchronize_memory_op::operator()( -// const base_device::DEVICE_GPU* dev_out, -// const base_device::DEVICE_GPU* dev_in, -// FPTYPE* arr_out, -// const FPTYPE* arr_in, -// const size_t size) -// { -// cudaErrcheck(cudaMemcpy(arr_out, arr_in, sizeof(FPTYPE) * size, cudaMemcpyDeviceToDevice)); -// } - -// template -// struct cast_memory_op -// { -// void operator()(const base_device::DEVICE_GPU* dev_out, -// const base_device::DEVICE_GPU* dev_in, -// FPTYPE_out* arr_out, -// const FPTYPE_in* arr_in, -// const size_t size) -// { -// if (size == 0) {return;} -// const int block = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; -// cast_memory<<>>(arr_out, arr_in, size); - -// cudaErrcheck(cudaGetLastError()); -// cudaErrcheck(cudaDeviceSynchronize()); -// } -// }; - -// template -// struct cast_memory_op { -// void operator()(const psi::DEVICE_GPU* dev_out, -// const psi::DEVICE_CPU* dev_in, -// FPTYPE_out* arr_out, -// const FPTYPE_in* arr_in, -// const size_t size) { - -// if (size == 0) {return;} -// // No need to cast the memory if the data types are the same. -// if (std::is_same::value) -// { -// synchronize_memory_op()(dev_out, -// dev_in, -// arr_out, -// reinterpret_cast(arr_in), -// size); -// return; -// } -// FPTYPE_in * arr = nullptr; -// cudaErrcheck(cudaMalloc((void **)&arr, sizeof(FPTYPE_in) * size)); -// cudaErrcheck(cudaMemcpy(arr, arr_in, sizeof(FPTYPE_in) * size, cudaMemcpyHostToDevice)); -// const int block = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; -// cast_memory<<>>(arr_out, arr, size); -// cudaErrcheck(cudaGetLastError()); -// cudaErrcheck(cudaDeviceSynchronize()); -// cudaErrcheck(cudaFree(arr)); -// } -// }; - -// template -// struct cast_memory_op { -// void operator()(const psi::DEVICE_CPU* dev_out, -// const psi::DEVICE_GPU* dev_in, -// FPTYPE_out* arr_out, -// const FPTYPE_in* arr_in, -// const size_t size) { -// if (size == 0) {return;} -// // No need to cast the memory if the data types are the same. -// if (std::is_same::value) -// { -// synchronize_memory_op()(dev_out, -// dev_in, -// arr_out, -// reinterpret_cast(arr_in), -// size); -// return; -// } -// auto * arr = (FPTYPE_in*) malloc(sizeof(FPTYPE_in) * size); -// cudaErrcheck(cudaMemcpy(arr, arr_in, sizeof(FPTYPE_in) * size, cudaMemcpyDeviceToHost)); -// for (int ii = 0; ii < size; ii++) { -// arr_out[ii] = static_cast(arr[ii]); -// } -// free(arr); -// } -// }; - -// template -// struct cast_memory_op -// { -// void operator()(const base_device::DEVICE_CPU* dev_out, -// const base_device::DEVICE_GPU* dev_in, -// FPTYPE_out* arr_out, -// const FPTYPE_in* arr_in, -// const size_t size) -// { -// auto * arr = (FPTYPE_in*) malloc(sizeof(FPTYPE_in) * size); -// cudaErrcheck(cudaMemcpy(arr, arr_in, sizeof(FPTYPE_in) * size, cudaMemcpyDeviceToHost)); -// for (int ii = 0; ii < size; ii++) { -// arr_out[ii] = static_cast(arr[ii]); -// } -// free(arr); -// } -// }; - -// template -// void delete_memory_op::operator()(const base_device::DEVICE_GPU* dev, FPTYPE* arr) -// { -// cudaErrcheck(cudaFree(arr)); -// } - -// template struct resize_memory_op; -// template struct resize_memory_op; -// template struct resize_memory_op; -// template struct resize_memory_op, base_device::DEVICE_GPU>; -// template struct resize_memory_op, base_device::DEVICE_GPU>; - -// template struct set_memory_op; -// template struct set_memory_op; -// template struct set_memory_op; -// template struct set_memory_op, base_device::DEVICE_GPU>; -// template struct set_memory_op, base_device::DEVICE_GPU>; - -// template struct synchronize_memory_op; -// template struct synchronize_memory_op; -// template struct synchronize_memory_op; -// template struct synchronize_memory_op; -// template struct synchronize_memory_op; -// template struct synchronize_memory_op; -// template struct synchronize_memory_op; -// template struct synchronize_memory_op; -// template struct synchronize_memory_op; -// template struct synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; -// template struct synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; -// template struct synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>; -// template struct synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; -// template struct synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; -// template struct synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>; - -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_GPU, -// base_device::DEVICE_GPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_GPU, -// base_device::DEVICE_GPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_GPU, -// base_device::DEVICE_GPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_GPU, -// base_device::DEVICE_GPU>; -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_GPU, -// base_device::DEVICE_CPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_GPU, -// base_device::DEVICE_CPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_GPU, -// base_device::DEVICE_CPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_GPU, -// base_device::DEVICE_CPU>; -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_CPU, -// base_device::DEVICE_GPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_CPU, -// base_device::DEVICE_GPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_CPU, -// base_device::DEVICE_GPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_CPU, -// base_device::DEVICE_GPU>; - -// template struct delete_memory_op; -// template struct delete_memory_op; -// template struct delete_memory_op; -// template struct delete_memory_op, base_device::DEVICE_GPU>; -// template struct delete_memory_op, base_device::DEVICE_GPU>; - -// } // end of namespace gpu_cuda -// } // end of namespace psi \ No newline at end of file diff --git a/source/module_psi/kernels/device.cpp b/source/module_psi/kernels/device.cpp deleted file mode 100644 index b2505388cb..0000000000 --- a/source/module_psi/kernels/device.cpp +++ /dev/null @@ -1,687 +0,0 @@ - -// #include -// #include -// #include "module_base/module_device/types.h" -// #include "module_base/tool_quit.h" - -// #include - -// #if defined(__CUDA) -// #include -// #endif - -// #if defined(__ROCM) -// #include -// #endif - -// #ifdef __MPI -// #include "mpi.h" -// #endif - -// namespace psi{ - -// namespace device{ - -// // static bool is_init = false; - -// // // functions used in custom ops -// // template <> -// // base_device::AbacusDevice_t get_device_type(const base_device::DEVICE_CPU* dev) -// // { -// // return base_device::CpuDevice; -// // } -// // template<> std::string get_current_precision(const double* var) { -// // return "double"; -// // } -// // template<> std::string get_current_precision (const std::complex * var) { -// // return "single"; -// // } -// // template<> std::string get_current_precision (const std::complex * var) { -// // return "double"; -// // } - -// #if ((defined __CUDA) || (defined __ROCM)) -// // template <> -// // base_device::AbacusDevice_t get_device_type(const base_device::DEVICE_GPU* dev) -// // { -// // return base_device::GpuDevice; -// // } - -// void set_device(const int rank) { -// #if defined (__CUDA) -// cudaErrcheck(cudaSetDevice(rank)); -// #elif defined (__ROCM) -// hipErrcheck(hipSetDevice(rank)); -// #endif -// } - -// int get_device_num() { -// int device_num = -1; -// #if defined (__CUDA) -// cudaErrcheck(cudaGetDeviceCount(&device_num)); -// #elif defined (__ROCM) -// hipErrcheck(hipGetDeviceCount(&device_num)); -// #endif -// return device_num; -// } -// #endif - -// // #if defined(__CUDA) -// // template <> -// // void print_device_info(const base_device::DEVICE_GPU* ctx, std::ofstream& ofs_device) -// // { -// // if (is_init) -// // { -// // return; -// // } -// // int deviceCount = 0; -// // cudaError_t error_id = cudaGetDeviceCount(&deviceCount); -// // if (error_id != cudaSuccess) -// // { -// // ofs_device << "cudaGetDeviceCount returned " << static_cast(error_id) << "\n-> " -// // << cudaGetErrorString(error_id) << std::endl; -// // ModuleBase::WARNING_QUIT("device", "GPU returned is without cudaSuccess"); -// // } -// // // This function call returns 0 if there are no CUDA capable devices. -// // if (deviceCount == 0) -// // { -// // ofs_device << "There are no available device(s) that support CUDA\n"; -// // } -// // else -// // { -// // ofs_device << "Detected " << deviceCount << " CUDA Capable device(s)\n"; -// // } -// // int dev = 0, driverVersion = 0, runtimeVersion = 0; -// // cudaErrcheck(cudaSetDevice(dev)); -// // cudaDeviceProp deviceProp; -// // cudaErrcheck(cudaGetDeviceProperties(&deviceProp, dev)); -// // ofs_device << "\nDevice " << dev << ":\t " << deviceProp.name << std::endl; -// // // Console log -// // cudaErrcheck(cudaDriverGetVersion(&driverVersion)); -// // cudaErrcheck(cudaRuntimeGetVersion(&runtimeVersion)); -// // char msg[1024]; -// // sprintf(msg, -// // " CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", -// // driverVersion / 1000, (driverVersion % 100) / 10, -// // runtimeVersion / 1000, (runtimeVersion % 100) / 10); -// // ofs_device << msg << std::endl; -// // sprintf(msg, -// // " CUDA Capability Major/Minor version number: %d.%d\n", -// // deviceProp.major, deviceProp.minor); -// // ofs_device << msg << std::endl; -// // sprintf(msg, -// // " GPU Max Clock rate: %.0f MHz (%0.2f " -// // "GHz)\n", -// // deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f); -// // ofs_device << msg << std::endl; -// // // This is supported in CUDA 5.0 (runtime API device properties) -// // sprintf(msg, -// // " Memory Clock rate: %.0f Mhz\n", -// // deviceProp.memoryClockRate * 1e-3f); -// // ofs_device << msg << std::endl; - -// // sprintf(msg, -// // " Memory Bus Width: %d-bit\n", -// // deviceProp.memoryBusWidth); -// // ofs_device << msg << std::endl; -// // sprintf(msg, -// // " Maximum Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d, " -// // "%d), 3D=(%d, %d, %d)\n", -// // deviceProp.maxTexture1D, deviceProp.maxTexture2D[0], -// // deviceProp.maxTexture2D[1], deviceProp.maxTexture3D[0], -// // deviceProp.maxTexture3D[1], deviceProp.maxTexture3D[2]); -// // ofs_device << msg << std::endl; - -// // sprintf(msg, -// // " Maximum Layered 1D Texture Size, (num) layers 1D=(%d), %d layers\n", -// // deviceProp.maxTexture1DLayered[0], deviceProp.maxTexture1DLayered[1]); -// // ofs_device << msg << std::endl; -// // sprintf(msg, -// // " Maximum Layered 2D Texture Size, (num) layers 2D=(%d, %d), %d " -// // "layers\n", -// // deviceProp.maxTexture2DLayered[0], deviceProp.maxTexture2DLayered[1], -// // deviceProp.maxTexture2DLayered[2]); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Total amount of constant memory: %zu bytes\n", -// // deviceProp.totalConstMem); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Total amount of shared memory per block: %zu bytes\n", -// // deviceProp.sharedMemPerBlock); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Total shared memory per multiprocessor: %zu bytes\n", -// // deviceProp.sharedMemPerMultiprocessor); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Total number of registers available per block: %d\n", -// // deviceProp.regsPerBlock); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Warp size: %d\n", -// // deviceProp.warpSize); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Maximum number of threads per multiprocessor: %d\n", -// // deviceProp.maxThreadsPerMultiProcessor); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Maximum number of threads per block: %d\n", -// // deviceProp.maxThreadsPerBlock); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Max dimension size of a thread block (x,y,z): (%d, %d, %d)\n", -// // deviceProp.maxThreadsDim[0], deviceProp.maxThreadsDim[1], -// // deviceProp.maxThreadsDim[2]); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Max dimension size of a grid size (x,y,z): (%d, %d, %d)\n", -// // deviceProp.maxGridSize[0], deviceProp.maxGridSize[1], -// // deviceProp.maxGridSize[2]); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Maximum memory pitch: %zu bytes\n", -// // deviceProp.memPitch); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Texture alignment: %zu bytes\n", -// // deviceProp.textureAlignment); -// // ofs_device << msg << std::endl; -// // sprintf(msg, -// // " Concurrent copy and kernel execution: %s with %d copy " -// // "engine(s)\n", -// // (deviceProp.deviceOverlap ? "Yes" : "No"), deviceProp.asyncEngineCount); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Run time limit on kernels: %s\n", -// // deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No"); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Integrated GPU sharing Host Memory: %s\n", -// // deviceProp.integrated ? "Yes" : "No"); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Support host page-locked memory mapping: %s\n", -// // deviceProp.canMapHostMemory ? "Yes" : "No"); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Alignment requirement for Surfaces: %s\n", -// // deviceProp.surfaceAlignment ? "Yes" : "No"); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Device has ECC support: %s\n", -// // deviceProp.ECCEnabled ? "Enabled" : "Disabled"); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Device supports Unified Addressing (UVA): %s\n", -// // deviceProp.unifiedAddressing ? "Yes" : "No"); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Device supports Managed Memory: %s\n", -// // deviceProp.managedMemory ? "Yes" : "No"); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Device supports Compute Preemption: %s\n", -// // deviceProp.computePreemptionSupported ? "Yes" : "No"); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Supports Cooperative Kernel Launch: %s\n", -// // deviceProp.cooperativeLaunch ? "Yes" : "No"); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Supports MultiDevice Co-op Kernel Launch: %s\n", -// // deviceProp.cooperativeMultiDeviceLaunch ? "Yes" : "No"); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Device PCI Domain ID / Bus ID / location ID: %d / %d / %d\n", -// // deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID); -// // ofs_device << msg << std::endl; -// // const char *sComputeMode[] = { -// // "Default (multiple host threads can use ::cudaSetDevice() with device " -// // "simultaneously)", -// // "Exclusive (only one host thread in one process is able to use " -// // "::cudaSetDevice() with this device)", -// // "Prohibited (no host thread can use ::cudaSetDevice() with this " -// // "device)", -// // "Exclusive Process (many threads in one process is able to use " -// // "::cudaSetDevice() with this device)", -// // "Unknown", NULL}; -// // sprintf(msg, " Compute Mode:\n"); -// // ofs_device << msg << std::endl; -// // ofs_device << " " << sComputeMode[deviceProp.computeMode] << std::endl << std::endl; - -// // // If there are 2 or more GPUs, query to determine whether RDMA is supported -// // if (deviceCount >= 2) { -// // cudaDeviceProp prop[64]; -// // int gpuid[64]; // we want to find the first two GPUs that can support P2P -// // int gpu_p2p_count = 0; - -// // for (int i = 0; i < deviceCount; i++) { -// // cudaErrcheck(cudaGetDeviceProperties(&prop[i], i)); - -// // // Only boards based on Fermi or later can support P2P -// // if (prop[i].major >= 2) { -// // // This is an array of P2P capable GPUs -// // gpuid[gpu_p2p_count++] = i; -// // } -// // } - -// // // Show all the combinations of support P2P GPUs -// // int can_access_peer; - -// // if (gpu_p2p_count >= 2) { -// // for (int i = 0; i < gpu_p2p_count; i++) { -// // for (int j = 0; j < gpu_p2p_count; j++) { -// // if (gpuid[i] == gpuid[j]) { -// // continue; -// // } -// // cudaErrcheck(cudaDeviceCanAccessPeer(&can_access_peer, gpuid[i], gpuid[j])); -// // sprintf(msg, "> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", -// // prop[gpuid[i]].name, gpuid[i], prop[gpuid[j]].name, gpuid[j], -// // can_access_peer ? "Yes" : "No"); -// // ofs_device << msg << std::endl; -// // } -// // } -// // } -// // } - -// // // csv masterlog info -// // // ***************************** -// // // exe and CUDA driver name -// // std::string sProfileString = "deviceQuery, CUDA Driver = CUDART"; -// // char cTemp[16]; - -// // // driver version -// // sProfileString += ", CUDA Driver Version = "; - -// // snprintf(cTemp, sizeof(cTemp), "%d.%d", driverVersion / 1000, -// // (driverVersion % 100) / 10); -// // sProfileString += cTemp; - -// // // Runtime version -// // sProfileString += ", CUDA Runtime Version = "; -// // snprintf(cTemp, sizeof(cTemp), "%d.%d", runtimeVersion / 1000, -// // (runtimeVersion % 100) / 10); -// // sProfileString += cTemp; - -// // // Device count -// // sProfileString += ", NumDevs = "; -// // snprintf(cTemp, sizeof(cTemp), "%d", deviceCount); -// // sProfileString += cTemp; -// // sProfileString += "\n"; - -// // ofs_device << sProfileString.c_str() << std::endl; -// // is_init = true; -// // ofs_device << "End of device informations." << std::endl << std::endl; -// // } - -// // template <> -// // void record_device_memory(const base_device::DEVICE_GPU* ctx, -// // std::ofstream& ofs_device, -// // std::string str, -// // size_t size) -// // { -// // ofs_device << "Allocate " << static_cast(size) / 8 / 1024 / 1024 << " \tMB device memory\t" -// // << "from " << str << std::endl -// // << std::endl; -// // } - -// // #elif defined(__ROCM) -// // template <> -// // void print_device_info(const base_device::DEVICE_GPU* ctx, std::ofstream& ofs_device) -// // { -// // if (is_init) -// // { -// // return; -// // } -// // int deviceCount = 0; -// // hipError_t error_id = hipGetDeviceCount(&deviceCount); -// // if (error_id != hipSuccess) -// // { -// // ofs_device << "hipGetDeviceCount returned " << static_cast(error_id) << "\n-> " -// // << hipGetErrorString(error_id) << std::endl; -// // ModuleBase::WARNING_QUIT("device", "GPU returned is without hipSuccess"); -// // } -// // // This function call returns 0 if there are no CUDA capable devices. -// // if (deviceCount == 0) -// // { -// // ofs_device << "There are no available device(s) that support CUDA\n"; -// // } -// // else -// // { -// // ofs_device << "Detected " << deviceCount << " CUDA Capable device(s)\n"; -// // } -// // int dev = 0, driverVersion = 0, runtimeVersion = 0; -// // hipErrcheck(hipSetDevice(dev)); -// // hipDeviceProp_t deviceProp; -// // hipErrcheck(hipGetDeviceProperties(&deviceProp, dev)); -// // ofs_device << "\nDevice " << dev << ":\t " << deviceProp.name << std::endl; -// // // Console log -// // hipErrcheck(hipDriverGetVersion(&driverVersion)); -// // hipErrcheck(hipRuntimeGetVersion(&runtimeVersion)); -// // char msg[1024]; -// // sprintf(msg, -// // " CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", -// // driverVersion / 1000, (driverVersion % 100) / 10, -// // runtimeVersion / 1000, (runtimeVersion % 100) / 10); -// // ofs_device << msg << std::endl; -// // sprintf(msg, -// // " CUDA Capability Major/Minor version number: %d.%d\n", -// // deviceProp.major, deviceProp.minor); -// // ofs_device << msg << std::endl; -// // sprintf(msg, -// // " GPU Max Clock rate: %.0f MHz (%0.2f " -// // "GHz)\n", -// // deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f); -// // ofs_device << msg << std::endl; -// // // This is supported in CUDA 5.0 (runtime API device properties) -// // sprintf(msg, -// // " Memory Clock rate: %.0f Mhz\n", -// // deviceProp.memoryClockRate * 1e-3f); -// // ofs_device << msg << std::endl; - -// // sprintf(msg, -// // " Memory Bus Width: %d-bit\n", -// // deviceProp.memoryBusWidth); -// // ofs_device << msg << std::endl; -// // sprintf(msg, -// // " Maximum Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d, " -// // "%d), 3D=(%d, %d, %d)\n", -// // deviceProp.maxTexture1D, deviceProp.maxTexture2D[0], -// // deviceProp.maxTexture2D[1], deviceProp.maxTexture3D[0], -// // deviceProp.maxTexture3D[1], deviceProp.maxTexture3D[2]); -// // ofs_device << msg << std::endl; - -// // sprintf(msg, " Total amount of constant memory: %zu bytes\n", -// // deviceProp.totalConstMem); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Total amount of shared memory per block: %zu bytes\n", -// // deviceProp.sharedMemPerBlock); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Total number of registers available per block: %d\n", -// // deviceProp.regsPerBlock); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Warp size: %d\n", -// // deviceProp.warpSize); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Maximum number of threads per multiprocessor: %d\n", -// // deviceProp.maxThreadsPerMultiProcessor); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Maximum number of threads per block: %d\n", -// // deviceProp.maxThreadsPerBlock); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Max dimension size of a thread block (x,y,z): (%d, %d, %d)\n", -// // deviceProp.maxThreadsDim[0], deviceProp.maxThreadsDim[1], -// // deviceProp.maxThreadsDim[2]); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Max dimension size of a grid size (x,y,z): (%d, %d, %d)\n", -// // deviceProp.maxGridSize[0], deviceProp.maxGridSize[1], -// // deviceProp.maxGridSize[2]); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Maximum memory pitch: %zu bytes\n", -// // deviceProp.memPitch); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Texture alignment: %zu bytes\n", -// // deviceProp.textureAlignment); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Run time limit on kernels: %s\n", -// // deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No"); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Integrated GPU sharing Host Memory: %s\n", -// // deviceProp.integrated ? "Yes" : "No"); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Support host page-locked memory mapping: %s\n", -// // deviceProp.canMapHostMemory ? "Yes" : "No"); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Device has ECC support: %s\n", -// // deviceProp.ECCEnabled ? "Enabled" : "Disabled"); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Device supports Managed Memory: %s\n", -// // deviceProp.managedMemory ? "Yes" : "No"); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Supports Cooperative Kernel Launch: %s\n", -// // deviceProp.cooperativeLaunch ? "Yes" : "No"); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Supports MultiDevice Co-op Kernel Launch: %s\n", -// // deviceProp.cooperativeMultiDeviceLaunch ? "Yes" : "No"); -// // ofs_device << msg << std::endl; -// // sprintf(msg, " Device PCI Domain ID / Bus ID / location ID: %d / %d / %d\n", -// // deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID); -// // ofs_device << msg << std::endl; -// // const char *sComputeMode[] = { -// // "Default (multiple host threads can use ::hipSetDevice() with device " -// // "simultaneously)", -// // "Exclusive (only one host thread in one process is able to use " -// // "::hipSetDevice() with this device)", -// // "Prohibited (no host thread can use ::hipSetDevice() with this " -// // "device)", -// // "Exclusive Process (many threads in one process is able to use " -// // "::hipSetDevice() with this device)", -// // "Unknown", NULL}; -// // sprintf(msg, " Compute Mode:\n"); -// // ofs_device << msg << std::endl; -// // ofs_device << " " << sComputeMode[deviceProp.computeMode] << std::endl << std::endl; - -// // // If there are 2 or more GPUs, query to determine whether RDMA is supported -// // if (deviceCount >= 2) { -// // hipDeviceProp_t prop[64]; -// // int gpuid[64]; // we want to find the first two GPUs that can support P2P -// // int gpu_p2p_count = 0; - -// // for (int i = 0; i < deviceCount; i++) { -// // hipErrcheck(hipGetDeviceProperties(&prop[i], i)); - -// // // Only boards based on Fermi or later can support P2P -// // if (prop[i].major >= 2) { -// // // This is an array of P2P capable GPUs -// // gpuid[gpu_p2p_count++] = i; -// // } -// // } - -// // // Show all the combinations of support P2P GPUs -// // int can_access_peer; - -// // if (gpu_p2p_count >= 2) { -// // for (int i = 0; i < gpu_p2p_count; i++) { -// // for (int j = 0; j < gpu_p2p_count; j++) { -// // if (gpuid[i] == gpuid[j]) { -// // continue; -// // } -// // hipErrcheck(hipDeviceCanAccessPeer(&can_access_peer, gpuid[i], gpuid[j])); -// // sprintf(msg, "> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", -// // prop[gpuid[i]].name, gpuid[i], prop[gpuid[j]].name, gpuid[j], -// // can_access_peer ? "Yes" : "No"); -// // ofs_device << msg << std::endl; -// // } -// // } -// // } -// // } - -// // // csv masterlog info -// // // ***************************** -// // // exe and CUDA driver name -// // std::string sProfileString = "deviceQuery, CUDA Driver = CUDART"; -// // char cTemp[16]; - -// // // driver version -// // sProfileString += ", CUDA Driver Version = "; - -// // snprintf(cTemp, sizeof(cTemp), "%d.%d", driverVersion / 1000, -// // (driverVersion % 100) / 10); -// // sProfileString += cTemp; - -// // // Runtime version -// // sProfileString += ", CUDA Runtime Version = "; -// // snprintf(cTemp, sizeof(cTemp), "%d.%d", runtimeVersion / 1000, -// // (runtimeVersion % 100) / 10); -// // sProfileString += cTemp; - -// // // Device count -// // sProfileString += ", NumDevs = "; -// // snprintf(cTemp, sizeof(cTemp), "%d", deviceCount); -// // sProfileString += cTemp; -// // sProfileString += "\n"; - -// // ofs_device << sProfileString.c_str() << std::endl; -// // is_init = true; -// // ofs_device << "End of device informations." << std::endl << std::endl; -// // } - -// // template <> -// // void record_device_memory(const base_device::DEVICE_GPU* ctx, -// // std::ofstream& ofs_device, -// // std::string str, -// // size_t size) -// // { -// // ofs_device << "Allocate " << static_cast(size) / 8 / 1024 / 1024 << " \tMB device memory\t" -// // << "from " << str << std::endl -// // << std::endl; -// // } - -// // #endif - -// // #if __MPI -// // int stringCmp(const void *a, const void* b) -// // { -// // char* m = (char*)a; -// // char* n = (char*)b; -// // int i, sum = 0; - -// // for(i = 0; i < MPI_MAX_PROCESSOR_NAME; i++) -// // if (m[i] == n[i]) -// // continue; -// // else -// // { -// // sum = m[i] - n[i]; -// // break; -// // } -// // return sum; -// // } - -// // int get_node_rank() { -// // char host_name[MPI_MAX_PROCESSOR_NAME]; -// // memset(host_name, '\0', sizeof(char) * MPI_MAX_PROCESSOR_NAME); -// // char (*host_names)[MPI_MAX_PROCESSOR_NAME]; -// // int n, namelen, color, rank, nprocs, myrank; -// // size_t bytes; -// // MPI_Comm nodeComm; - -// // MPI_Comm_rank(MPI_COMM_WORLD, &rank); -// // MPI_Comm_size(MPI_COMM_WORLD, &nprocs); -// // MPI_Get_processor_name(host_name,&namelen); - -// // bytes = nprocs * sizeof(char[MPI_MAX_PROCESSOR_NAME]); -// // host_names = (char (*)[MPI_MAX_PROCESSOR_NAME]) malloc(bytes); -// // for (int ii = 0; ii < nprocs; ii++) { -// // memset(host_names[ii], '\0', sizeof(char) * MPI_MAX_PROCESSOR_NAME); -// // } - -// // strcpy(host_names[rank], host_name); - -// // for (n=0; n -// #include - -// #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600 -// static __inline__ __device__ double atomicAdd(double *address, double val) { -// unsigned long long int *address_as_ull = (unsigned long long int *)address; -// unsigned long long int old = *address_as_ull, assumed; -// do { -// assumed = old; -// old = atomicCAS(address_as_ull, assumed, -// __double_as_longlong(val + __longlong_as_double(assumed))); -// // Note: uses integer comparison to avoid hang in case of NaN (since NaN != -// // NaN) } while (assumed != old); -// } while (assumed != old); -// return __longlong_as_double(old); -// } -// #endif - -// namespace psi { -// namespace device { - -// template -// base_device::AbacusDevice_t get_device_type(const Device* dev); - -// template std::string get_current_precision(const T* var); - -// template void print_device_info (const Device* dev, std::ofstream& ofs_device) {return;} - -// template void record_device_memory (const Device* dev, std::ofstream& ofs_device, std::string str, size_t size) {return;} - -// std::string get_device_info(std::string device_flag); - -// int get_device_kpar(const int& kpar); -// std::string get_device_flag(const std::string& device, const std::string& ks_solver, const std::string& basis_type, const bool& gamma_only); - -// #if __MPI -// int get_node_rank(); -// int stringCmp(const void *a, const void* b); -// #endif - -// #if ((defined __CUDA) || (defined __ROCM)) -// int get_device_num(); -// void set_device(const int rank); -// #endif - -// } // end of namespace device -// } // end of namespace psi - -// #endif // MODULE_PSI_DEVICE_H_ \ No newline at end of file diff --git a/source/module_psi/kernels/memory_op.h b/source/module_psi/kernels/memory_op.h deleted file mode 100644 index a8154426ef..0000000000 --- a/source/module_psi/kernels/memory_op.h +++ /dev/null @@ -1,234 +0,0 @@ -// // TODO: This is a temperary location for these functions. -// // And will be moved to a global module(module base) later. -// #ifndef MODULE_PSI_MEMORY_H_ -// #define MODULE_PSI_MEMORY_H_ - -// #include "module_base/module_device/types.h" - -// #include -// #include -// #include - -// namespace psi { -// namespace memory { - -// template -// struct resize_memory_op { -// /// @brief Allocate memory for a given pointer. Note this op will free the pointer first. -// /// -// /// Input Parameters -// /// \param dev : the type of computing device -// /// \param size : array size -// /// \param record_string : label for memory record -// /// -// /// Output Parameters -// /// \param arr : allocated array -// void operator()(const Device* dev, FPTYPE*& arr, const size_t size, const char* record_in = nullptr); -// }; - -// template -// struct set_memory_op { -// /// @brief memset for multi-device -// /// -// /// Input Parameters -// /// \param dev : the type of computing device -// /// \param var : the specified constant value -// /// \param size : array size -// /// -// /// Output Parameters -// /// \param arr : output array initialized by the input value -// void operator()(const Device* dev, FPTYPE* arr, const int var, const size_t size); -// }; - -// template -// struct synchronize_memory_op { -// /// @brief memcpy for multi-device -// /// -// /// Input Parameters -// /// \param dev_out : the type of computing device of arr_out -// /// \param dev_in : the type of computing device of arr_in -// /// \param arr_in : input array -// /// \param size : array size -// /// -// /// Output Parameters -// /// \param arr_out : output array initialized by the input array -// void operator()( -// const Device_out* dev_out, -// const Device_in* dev_in, -// FPTYPE* arr_out, -// const FPTYPE* arr_in, -// const size_t size); -// }; - -// template -// struct cast_memory_op { -// /// @brief memcpy for multi-device -// /// -// /// Input Parameters -// /// \param dev_out : the type of computing device of arr_out -// /// \param dev_in : the type of computing device of arr_in -// /// \param arr_in : input array -// /// \param size : array size -// /// -// /// Output Parameters -// /// \param arr_out : output array initialized by the input array -// void operator()( -// const Device_out* dev_out, -// const Device_in* dev_in, -// FPTYPE_out* arr_out, -// const FPTYPE_in* arr_in, -// const size_t size); -// }; - -// template -// struct delete_memory_op { -// /// @brief free memory for multi-device -// /// -// /// Input Parameters -// /// \param dev : the type of computing device -// /// \param arr : the input array -// void operator()(const Device* dev, FPTYPE* arr); -// }; - -// #if __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM -// // Partially specialize operator for base_device::GpuDevice. -// template -// struct resize_memory_op -// { -// void operator()(const base_device::DEVICE_GPU* dev, -// FPTYPE*& arr, -// const size_t size, -// const char* record_in = nullptr); -// }; - -// template -// struct set_memory_op -// { -// void operator()(const base_device::DEVICE_GPU* dev, FPTYPE* arr, const int var, const size_t size); -// }; - -// template -// struct synchronize_memory_op -// { -// void operator()(const base_device::DEVICE_CPU* dev_out, -// const base_device::DEVICE_GPU* dev_in, -// FPTYPE* arr_out, -// const FPTYPE* arr_in, -// const size_t size); -// }; -// template -// struct synchronize_memory_op -// { -// void operator()(const base_device::DEVICE_GPU* dev_out, -// const base_device::DEVICE_CPU* dev_in, -// FPTYPE* arr_out, -// const FPTYPE* arr_in, -// const size_t size); -// }; -// template -// struct synchronize_memory_op -// { -// void operator()(const base_device::DEVICE_GPU* dev_out, -// const base_device::DEVICE_GPU* dev_in, -// FPTYPE* arr_out, -// const FPTYPE* arr_in, -// const size_t size); -// }; - -// template -// struct delete_memory_op -// { -// void operator()(const base_device::DEVICE_GPU* dev, FPTYPE* arr); -// }; -// #endif -// // __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM - -// } // end of namespace memory -// } // end of namespace psi - -// // using resmem_sh_op = base_device::memory::resize_memory_op; -// // using resmem_dh_op = base_device::memory::resize_memory_op; -// // using resmem_ch_op = base_device::memory::resize_memory_op, base_device::DEVICE_CPU>; -// // using resmem_zh_op = base_device::memory::resize_memory_op, base_device::DEVICE_CPU>; - -// // using resmem_sd_op = base_device::memory::resize_memory_op; -// // using resmem_dd_op = base_device::memory::resize_memory_op; -// // using resmem_cd_op = base_device::memory::resize_memory_op, base_device::DEVICE_GPU>; -// // using resmem_zd_op = base_device::memory::resize_memory_op, base_device::DEVICE_GPU>; - -// // using setmem_sh_op = base_device::memory::set_memory_op; -// // using setmem_dh_op = base_device::memory::set_memory_op; -// // using setmem_ch_op = base_device::memory::set_memory_op, base_device::DEVICE_CPU>; -// // using setmem_zh_op = base_device::memory::set_memory_op, base_device::DEVICE_CPU>; - -// // using setmem_sd_op = base_device::memory::set_memory_op; -// // using setmem_dd_op = base_device::memory::set_memory_op; -// // using setmem_cd_op = base_device::memory::set_memory_op, base_device::DEVICE_GPU>; -// // using setmem_zd_op = base_device::memory::set_memory_op, base_device::DEVICE_GPU>; - -// // using delmem_sh_op = base_device::memory::delete_memory_op; -// // using delmem_dh_op = base_device::memory::delete_memory_op; -// // using delmem_ch_op = base_device::memory::delete_memory_op, base_device::DEVICE_CPU>; -// // using delmem_zh_op = base_device::memory::delete_memory_op, base_device::DEVICE_CPU>; - -// // using delmem_sd_op = base_device::memory::delete_memory_op; -// // using delmem_dd_op = base_device::memory::delete_memory_op; -// // using delmem_cd_op = base_device::memory::delete_memory_op, base_device::DEVICE_GPU>; -// // using delmem_zd_op = base_device::memory::delete_memory_op, base_device::DEVICE_GPU>; - -// // using syncmem_s2s_h2h_op -// // = base_device::memory::synchronize_memory_op; -// // using syncmem_s2s_h2d_op -// // = base_device::memory::synchronize_memory_op; -// // using syncmem_s2s_d2h_op -// // = base_device::memory::synchronize_memory_op; -// // using syncmem_d2d_h2h_op -// // = base_device::memory::synchronize_memory_op; -// // using syncmem_d2d_h2d_op -// // = base_device::memory::synchronize_memory_op; -// // using syncmem_d2d_d2h_op -// // = base_device::memory::synchronize_memory_op; - -// // using syncmem_c2c_h2h_op -// // = base_device::memory::synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; -// // using syncmem_c2c_h2d_op -// // = base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; -// // using syncmem_c2c_d2h_op -// // = base_device::memory::synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; -// // using syncmem_z2z_h2h_op = base_device::memory:: -// // synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; -// // using syncmem_z2z_h2d_op = base_device::memory:: -// // synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; -// // using syncmem_z2z_d2h_op = base_device::memory:: -// // synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; - -// // using castmem_s2d_h2h_op -// // = base_device::memory::cast_memory_op; -// // using castmem_s2d_h2d_op -// // = base_device::memory::cast_memory_op; -// // using castmem_s2d_d2h_op -// // = base_device::memory::cast_memory_op; -// // using castmem_d2s_h2h_op -// // = base_device::memory::cast_memory_op; -// // using castmem_d2s_h2d_op -// // = base_device::memory::cast_memory_op; -// // using castmem_d2s_d2h_op -// // = base_device::memory::cast_memory_op; - -// // using castmem_c2z_h2h_op = psi::memory:: -// // cast_memory_op, std::complex, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; -// // using castmem_c2z_h2d_op = psi::memory:: -// // cast_memory_op, std::complex, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; -// // using castmem_c2z_d2h_op = psi::memory:: -// // cast_memory_op, std::complex, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; -// // using castmem_z2c_h2h_op = psi::memory:: -// // cast_memory_op, std::complex, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; -// // using castmem_z2c_h2d_op = psi::memory:: -// // cast_memory_op, std::complex, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; -// // using castmem_z2c_d2h_op = psi::memory:: -// // cast_memory_op, std::complex, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; - -// // static base_device::DEVICE_CPU* cpu_ctx = {}; -// // static base_device::DEVICE_GPU* gpu_ctx = {}; - -// #endif // MODULE_PSI_MEMORY_H_ \ No newline at end of file diff --git a/source/module_psi/kernels/psi_memory_op.cpp b/source/module_psi/kernels/psi_memory_op.cpp deleted file mode 100644 index 06b63ebd7b..0000000000 --- a/source/module_psi/kernels/psi_memory_op.cpp +++ /dev/null @@ -1,338 +0,0 @@ -// #include -// #include -// #include -// #include "module_base/module_device/types.h" -// #include "module_psi/kernels/memory_op.h" -// #include "module_base/memory.h" -// #include "module_base/tool_threading.h" - -// namespace psi{ -// namespace memory{ - -// template -// struct resize_memory_op -// { -// void operator()(const base_device::DEVICE_CPU* dev, FPTYPE*& arr, const size_t size, const char* record_in) -// { -// if (arr != nullptr) -// { -// free(arr); -// } -// arr = (FPTYPE*)malloc(sizeof(FPTYPE) * size); -// std::string record_string; -// if (record_in != nullptr) -// { -// record_string = record_in; -// } -// else -// { -// record_string = "no_record"; -// } - -// if (record_string != "no_record") -// { -// ModuleBase::Memory::record(record_string, sizeof(FPTYPE) * size); -// } -// } -// }; - -// template -// struct set_memory_op -// { -// void operator()(const base_device::DEVICE_CPU* dev, FPTYPE* arr, const int var, const size_t size) -// { -// ModuleBase::OMP_PARALLEL([&](int num_thread, int thread_id) { -// int beg = 0, len = 0; -// ModuleBase::BLOCK_TASK_DIST_1D(num_thread, thread_id, size, (size_t)4096 / sizeof(FPTYPE), beg, len); -// memset(arr + beg, var, sizeof(FPTYPE) * len); -// }); -// } -// }; - -// template -// struct synchronize_memory_op -// { -// void operator()(const base_device::DEVICE_CPU* dev_out, -// const base_device::DEVICE_CPU* dev_in, -// FPTYPE* arr_out, -// const FPTYPE* arr_in, -// const size_t size) -// { -// ModuleBase::OMP_PARALLEL([&](int num_thread, int thread_id) { -// int beg = 0, len = 0; -// ModuleBase::BLOCK_TASK_DIST_1D(num_thread, thread_id, size, (size_t)4096 / sizeof(FPTYPE), beg, len); -// memcpy(arr_out + beg, arr_in + beg, sizeof(FPTYPE) * len); -// }); -// } -// }; - -// template -// struct cast_memory_op -// { -// void operator()(const base_device::DEVICE_CPU* dev_out, -// const base_device::DEVICE_CPU* dev_in, -// FPTYPE_out* arr_out, -// const FPTYPE_in* arr_in, -// const size_t size) -// { -// #ifdef _OPENMP -// #pragma omp parallel for schedule(static, 4096/sizeof(FPTYPE_out)) -// #endif -// for (int ii = 0; ii < size; ii++) { -// arr_out[ii] = static_cast(arr_in[ii]); -// } -// } -// }; - -// template -// struct delete_memory_op -// { -// void operator()(const base_device::DEVICE_CPU* dev, FPTYPE* arr) -// { -// free(arr); -// } -// }; - -// template struct resize_memory_op; -// template struct resize_memory_op; -// template struct resize_memory_op; -// template struct resize_memory_op, base_device::DEVICE_CPU>; -// template struct resize_memory_op, base_device::DEVICE_CPU>; - -// template struct set_memory_op; -// template struct set_memory_op; -// template struct set_memory_op; -// template struct set_memory_op, base_device::DEVICE_CPU>; -// template struct set_memory_op, base_device::DEVICE_CPU>; - -// template struct synchronize_memory_op; -// template struct synchronize_memory_op; -// template struct synchronize_memory_op; -// template struct synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; -// template struct synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; - -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_CPU, -// base_device::DEVICE_CPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_CPU, -// base_device::DEVICE_CPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_CPU, -// base_device::DEVICE_CPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_CPU, -// base_device::DEVICE_CPU>; - -// template struct delete_memory_op; -// template struct delete_memory_op; -// template struct delete_memory_op; -// template struct delete_memory_op, base_device::DEVICE_CPU>; -// template struct delete_memory_op, base_device::DEVICE_CPU>; - -// #if !(defined(__CUDA) || defined(__ROCM)) -// template -// struct resize_memory_op -// { -// void operator()(const base_device::DEVICE_GPU* dev, -// FPTYPE*& arr, -// const size_t size, -// const char* record_in = nullptr) -// { -// } -// }; - -// template -// struct set_memory_op -// { -// void operator()(const base_device::DEVICE_GPU* dev, FPTYPE* arr, const int var, const size_t size) -// { -// } -// }; - -// template -// struct synchronize_memory_op -// { -// void operator()(const base_device::DEVICE_GPU* dev_out, -// const base_device::DEVICE_GPU* dev_in, -// FPTYPE* arr_out, -// const FPTYPE* arr_in, -// const size_t size) -// { -// } -// }; - -// template -// struct synchronize_memory_op -// { -// void operator()(const base_device::DEVICE_GPU* dev_out, -// const base_device::DEVICE_CPU* dev_in, -// FPTYPE* arr_out, -// const FPTYPE* arr_in, -// const size_t size) -// { -// } -// }; - -// template -// struct synchronize_memory_op -// { -// void operator()(const base_device::DEVICE_CPU* dev_out, -// const base_device::DEVICE_GPU* dev_in, -// FPTYPE* arr_out, -// const FPTYPE* arr_in, -// const size_t size) -// { -// } -// }; - -// template -// struct cast_memory_op -// { -// void operator()(const base_device::DEVICE_GPU* dev_out, -// const base_device::DEVICE_GPU* dev_in, -// FPTYPE_out* arr_out, -// const FPTYPE_in* arr_in, -// const size_t size) -// { -// } -// }; - -// template -// struct cast_memory_op -// { -// void operator()(const base_device::DEVICE_GPU* dev_out, -// const base_device::DEVICE_CPU* dev_in, -// FPTYPE_out* arr_out, -// const FPTYPE_in* arr_in, -// const size_t size) -// { -// } -// }; - -// template -// struct cast_memory_op -// { -// void operator()(const base_device::DEVICE_CPU* dev_out, -// const base_device::DEVICE_GPU* dev_in, -// FPTYPE_out* arr_out, -// const FPTYPE_in* arr_in, -// const size_t size) -// { -// } -// }; - -// template -// struct delete_memory_op -// { -// void operator()(const base_device::DEVICE_GPU* dev, FPTYPE* arr) -// { -// } -// }; - -// template struct resize_memory_op; -// template struct resize_memory_op; -// template struct resize_memory_op; -// template struct resize_memory_op, base_device::DEVICE_GPU>; -// template struct resize_memory_op, base_device::DEVICE_GPU>; - -// template struct set_memory_op; -// template struct set_memory_op; -// template struct set_memory_op; -// template struct set_memory_op, base_device::DEVICE_GPU>; -// template struct set_memory_op, base_device::DEVICE_GPU>; - -// template struct synchronize_memory_op; -// template struct synchronize_memory_op; -// template struct synchronize_memory_op; -// template struct synchronize_memory_op; -// template struct synchronize_memory_op; -// template struct synchronize_memory_op; -// template struct synchronize_memory_op; -// template struct synchronize_memory_op; -// template struct synchronize_memory_op; -// template struct synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; -// template struct synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; -// template struct synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>; -// template struct synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; -// template struct synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; -// template struct synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>; - -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_GPU, -// base_device::DEVICE_GPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_GPU, -// base_device::DEVICE_GPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_GPU, -// base_device::DEVICE_GPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_GPU, -// base_device::DEVICE_GPU>; -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_GPU, -// base_device::DEVICE_CPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_GPU, -// base_device::DEVICE_CPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_GPU, -// base_device::DEVICE_CPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_GPU, -// base_device::DEVICE_CPU>; -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_CPU, -// base_device::DEVICE_GPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_CPU, -// base_device::DEVICE_GPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_CPU, -// base_device::DEVICE_GPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_CPU, -// base_device::DEVICE_GPU>; - -// template struct delete_memory_op; -// template struct delete_memory_op; -// template struct delete_memory_op; -// template struct delete_memory_op, base_device::DEVICE_GPU>; -// template struct delete_memory_op, base_device::DEVICE_GPU>; -// #endif - -// } -// } \ No newline at end of file diff --git a/source/module_psi/kernels/rocm/memory_op.hip.cu b/source/module_psi/kernels/rocm/memory_op.hip.cu deleted file mode 100644 index 03ed12632d..0000000000 --- a/source/module_psi/kernels/rocm/memory_op.hip.cu +++ /dev/null @@ -1,269 +0,0 @@ -// #include "module_psi/kernels/memory_op.h" - -// #include -// #include - -// #include -// #include -// #include - -// #define THREADS_PER_BLOCK 256 - -// namespace psi { -// namespace memory { - -// template -// __global__ void cast_memory( -// FPTYPE_out* out, -// const FPTYPE_in* in, -// const int size) -// { -// int idx = blockIdx.x * blockDim.x + threadIdx.x; -// if(idx >= size) {return;} -// out[idx] = static_cast(in[idx]); -// } - -// template -// __global__ void cast_memory( -// std::complex* out, -// const std::complex* in, -// const int size) -// { -// int idx = blockIdx.x * blockDim.x + threadIdx.x; -// if(idx >= size) {return;} -// auto* _out = reinterpret_cast*>(out); -// const auto* _in = reinterpret_cast*>(in); -// _out[idx] = static_cast>(_in[idx]); -// } - -// template -// void resize_memory_op::operator()(const base_device::DEVICE_GPU* dev, -// FPTYPE*& arr, -// const size_t size, -// const char* record_in) -// { -// if (arr != nullptr) { -// delete_memory_op()(dev, arr); -// } -// hipErrcheck(hipMalloc((void **)&arr, sizeof(FPTYPE) * size)); -// } - -// template -// void set_memory_op::operator()(const base_device::DEVICE_GPU* dev, -// FPTYPE* arr, -// const int var, -// const size_t size) -// { -// hipErrcheck(hipMemset(arr, var, sizeof(FPTYPE) * size)); -// } - -// template -// void synchronize_memory_op::operator()( -// const base_device::DEVICE_CPU* dev_out, -// const base_device::DEVICE_GPU* dev_in, -// FPTYPE* arr_out, -// const FPTYPE* arr_in, -// const size_t size) -// { -// hipErrcheck(hipMemcpy(arr_out, arr_in, sizeof(FPTYPE) * size, hipMemcpyDeviceToHost)); -// } - -// template -// void synchronize_memory_op::operator()( -// const base_device::DEVICE_GPU* dev_out, -// const base_device::DEVICE_CPU* dev_in, -// FPTYPE* arr_out, -// const FPTYPE* arr_in, -// const size_t size) -// { -// hipErrcheck(hipMemcpy(arr_out, arr_in, sizeof(FPTYPE) * size, hipMemcpyHostToDevice)); -// } - -// template -// void synchronize_memory_op::operator()( -// const base_device::DEVICE_GPU* dev_out, -// const base_device::DEVICE_GPU* dev_in, -// FPTYPE* arr_out, -// const FPTYPE* arr_in, -// const size_t size) -// { -// hipErrcheck(hipMemcpy(arr_out, arr_in, sizeof(FPTYPE) * size, hipMemcpyDeviceToDevice)); -// } - -// template -// struct cast_memory_op { -// void operator()(const psi::DEVICE_GPU* dev_out, -// const psi::DEVICE_GPU* dev_in, -// FPTYPE_out* arr_out, -// const FPTYPE_in* arr_in, -// const size_t size) { - -// if (size == 0) {return;} -// const int block = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; -// hipLaunchKernelGGL(cast_memory, dim3(block), dim3(THREADS_PER_BLOCK), 0, 0, arr_out, arr_in, size); -// hipErrcheck(hipGetLastError()); -// hipErrcheck(hipDeviceSynchronize()); -// } -// }; - -// template -// struct cast_memory_op { -// void operator()(const psi::DEVICE_GPU* dev_out, -// const psi::DEVICE_CPU* dev_in, -// FPTYPE_out* arr_out, -// const FPTYPE_in* arr_in, -// const size_t size) { - -// if (size == 0) {return;} -// // No need to cast the memory if the data types are the same. -// if (std::is_same::value) -// { -// synchronize_memory_op()(dev_out, -// dev_in, -// arr_out, -// reinterpret_cast(arr_in), -// size); -// return; -// } -// FPTYPE_in * arr = nullptr; -// hipErrcheck(hipMalloc((void **)&arr, sizeof(FPTYPE_in) * size)); -// hipErrcheck(hipMemcpy(arr, arr_in, sizeof(FPTYPE_in) * size, hipMemcpyHostToDevice)); -// const int block = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; -// hipLaunchKernelGGL(cast_memory, dim3(block), dim3(THREADS_PER_BLOCK), 0, 0, arr_out, arr, size); -// hipErrcheck(hipGetLastError()); -// hipErrcheck(hipDeviceSynchronize()); -// hipErrcheck(hipFree(arr)); -// } -// }; - -// template -// struct cast_memory_op { -// void operator()(const psi::DEVICE_CPU* dev_out, -// const psi::DEVICE_GPU* dev_in, -// FPTYPE_out* arr_out, -// const FPTYPE_in* arr_in, -// const size_t size) { - -// if (size == 0) {return;} -// // No need to cast the memory if the data types are the same. -// if (std::is_same::value) -// { -// synchronize_memory_op()(dev_out, -// dev_in, -// arr_out, -// reinterpret_cast(arr_in), -// size); -// return; -// } -// auto * arr = (FPTYPE_in*) malloc(sizeof(FPTYPE_in) * size); -// hipErrcheck(hipMemcpy(arr, arr_in, sizeof(FPTYPE_in) * size, hipMemcpyDeviceToHost)); -// for (int ii = 0; ii < size; ii++) { -// arr_out[ii] = static_cast(arr[ii]); -// } -// free(arr); -// } -// }; - -// template -// void delete_memory_op::operator()(const base_device::DEVICE_GPU* dev, FPTYPE* arr) -// { -// hipErrcheck(hipFree(arr)); -// } - -// template struct resize_memory_op; -// template struct resize_memory_op; -// template struct resize_memory_op; -// template struct resize_memory_op, base_device::DEVICE_GPU>; -// template struct resize_memory_op, base_device::DEVICE_GPU>; - -// template struct set_memory_op; -// template struct set_memory_op; -// template struct set_memory_op; -// template struct set_memory_op, base_device::DEVICE_GPU>; -// template struct set_memory_op, base_device::DEVICE_GPU>; - -// template struct synchronize_memory_op; -// template struct synchronize_memory_op; -// template struct synchronize_memory_op; -// template struct synchronize_memory_op; -// template struct synchronize_memory_op; -// template struct synchronize_memory_op; -// template struct synchronize_memory_op; -// template struct synchronize_memory_op; -// template struct synchronize_memory_op; -// template struct synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; -// template struct synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; -// template struct synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>; -// template struct synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; -// template struct synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; -// template struct synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>; - -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_GPU, -// base_device::DEVICE_GPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_GPU, -// base_device::DEVICE_GPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_GPU, -// base_device::DEVICE_GPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_GPU, -// base_device::DEVICE_GPU>; -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_GPU, -// base_device::DEVICE_CPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_GPU, -// base_device::DEVICE_CPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_GPU, -// base_device::DEVICE_CPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_GPU, -// base_device::DEVICE_CPU>; -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_CPU, -// base_device::DEVICE_GPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_CPU, -// base_device::DEVICE_GPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_CPU, -// base_device::DEVICE_GPU>; -// template struct cast_memory_op, -// std::complex, -// base_device::DEVICE_CPU, -// base_device::DEVICE_GPU>; - -// template struct delete_memory_op; -// template struct delete_memory_op; -// template struct delete_memory_op; -// template struct delete_memory_op, base_device::DEVICE_GPU>; -// template struct delete_memory_op, base_device::DEVICE_GPU>; - -// } // end of namespace gpu_cuda -// } // end of namespace psi \ No newline at end of file diff --git a/source/module_psi/kernels/test/CMakeLists.txt b/source/module_psi/kernels/test/CMakeLists.txt deleted file mode 100644 index 2c09c8741b..0000000000 --- a/source/module_psi/kernels/test/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -# AddTest( -# TARGET Module_Psi_UTs -# LIBS ${math_libs} base device -# SOURCES memory_test.cpp device_test.cpp -# ) \ No newline at end of file diff --git a/source/module_psi/kernels/test/device_test.cpp b/source/module_psi/kernels/test/device_test.cpp deleted file mode 100644 index b5aff72fd8..0000000000 --- a/source/module_psi/kernels/test/device_test.cpp +++ /dev/null @@ -1,31 +0,0 @@ -#include -#include -#include -#include "module_base/module_device/types.h" - -class TestModulePsiDevice : public ::testing::Test -{ - protected: - const base_device::DEVICE_CPU* cpu_ctx = {}; - const base_device::DEVICE_GPU* gpu_ctx = {}; - - void SetUp() override { - } - void TearDown() override { - } -}; - -TEST_F(TestModulePsiDevice, get_device_type_cpu) -{ - base_device::AbacusDevice_t device = psi::device::get_device_type(cpu_ctx); - EXPECT_EQ(device, base_device::CpuDevice); -} - -#if __UT_USE_CUDA || __UT_USE_ROCM -TEST_F(TestModulePsiDevice, get_device_type_gpu) -{ - base_device::AbacusDevice_t device = psi::device::get_device_type(gpu_ctx); - EXPECT_EQ(device, base_device::GpuDevice); -} -#endif // __UT_USE_CUDA || __UT_USE_ROCM - diff --git a/source/module_psi/kernels/test/memory_test.cpp b/source/module_psi/kernels/test/memory_test.cpp deleted file mode 100644 index d154cbc4fd..0000000000 --- a/source/module_psi/kernels/test/memory_test.cpp +++ /dev/null @@ -1,349 +0,0 @@ -#include -#include -#include -#include "module_psi/kernels/memory_op.h" -#if __UT_USE_CUDA || __UT_USE_ROCM -#include -#include -#include -#include -#include -#include -#include -#include -#include -#endif - -class TestModulePsiMemory : public ::testing::Test -{ - protected: - const std::vector xx = { - -0.65412617, -0.74208893, -2.21731157, 0.42540039, - 3.36373004, -2.51647562, -2.985111 , -0.53251562, - 0.37908265, 0.81605825, 1.66281318, 2.71761869, - 2.2010268 , 0.65498149, 1.51153638, 0.71501482, - 0.53546578, 1.4564317 , -2.36701143, 1.23009056, - 3.41302551, -2.3175205 , -0.27628221, -1.35701656 - }; - - const std::vector > z_xx = { - {-0.65412617, -0.74208893}, {-2.21731157, 0.42540039}, - {3.36373004, -2.51647562}, {-2.985111 , -0.53251562}, - {0.37908265, 0.81605825}, { 1.66281318, 2.71761869}, - {2.2010268 , 0.65498149}, { 1.51153638, 0.71501482}, - {0.53546578, 1.4564317 }, {-2.36701143, 1.23009056}, - {3.41302551, -2.3175205 }, {-0.27628221, -1.35701656} - }; - - const int z_dim = z_xx.size(); - - const base_device::DEVICE_CPU* cpu_ctx = {}; - const base_device::DEVICE_GPU* gpu_ctx = {}; - - void SetUp() override { - } - void TearDown() override { - } - - using set_memory_double_cpu_op = base_device::memory::set_memory_op; - using set_memory_complex_double_cpu_op - = base_device::memory::set_memory_op, base_device::DEVICE_CPU>; - using resize_memory_double_cpu_op = base_device::memory::resize_memory_op; - using resize_memory_comlex_double_cpu_op - = base_device::memory::resize_memory_op, base_device::DEVICE_CPU>; - using synchronize_memory_double_cpu_to_cpu_op - = base_device::memory::synchronize_memory_op; - using synchronize_memory_complex_double_cpu_to_cpu_op = base_device::memory:: - synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; - using delete_memory_double_cpu_op = base_device::memory::delete_memory_op; - using delete_memory_complex_double_cpu_op - = base_device::memory::delete_memory_op, base_device::DEVICE_CPU>; - -#if __UT_USE_CUDA || __UT_USE_ROCM - using set_memory_double_gpu_op = base_device::memory::set_memory_op; - using set_memory_complex_double_gpu_op - = base_device::memory::set_memory_op, base_device::DEVICE_GPU>; - using resize_memory_double_gpu_op = base_device::memory::resize_memory_op; - using resize_memory_comlex_double_gpu_op - = base_device::memory::resize_memory_op, base_device::DEVICE_GPU>; - using synchronize_memory_double_cpu_to_gpu_op - = base_device::memory::synchronize_memory_op; - using synchronize_memory_double_gpu_to_cpu_op - = base_device::memory::synchronize_memory_op; - using synchronize_memory_double_gpu_to_gpu_op - = base_device::memory::synchronize_memory_op; - using synchronize_memory_complex_double_cpu_to_gpu_op = base_device::memory:: - synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; - using synchronize_memory_complex_double_gpu_to_cpu_op = base_device::memory:: - synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; - using synchronize_memory_complex_double_gpu_to_gpu_op = base_device::memory:: - synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>; - using delete_memory_double_gpu_op = base_device::memory::delete_memory_op; - using delete_memory_complex_double_gpu_op - = base_device::memory::delete_memory_op, base_device::DEVICE_GPU>; -#endif // __UT_USE_CUDA || __UT_USE_ROCM -}; - -TEST_F(TestModulePsiMemory, set_memory_op_double_cpu) -{ - std::vector v_xx = xx; - set_memory_double_cpu_op()(cpu_ctx, v_xx.data(), 0, xx.size()); - for (int ii = 0; ii < xx.size(); ii++) { - EXPECT_EQ(v_xx[ii], 0.0); - } -} - -TEST_F(TestModulePsiMemory, set_memory_op_complex_double_cpu) -{ - std::vector> vz_xx = z_xx; - set_memory_complex_double_cpu_op()(cpu_ctx, vz_xx.data(), 0, z_xx.size()); - for (int ii = 0; ii < z_xx.size(); ii++) { - EXPECT_EQ(vz_xx[ii], std::complex(0.0, 0.0)); - } -} - -TEST_F(TestModulePsiMemory, resize_memory_op_double_cpu) -{ - double* xx_tmp = NULL; - resize_memory_double_cpu_op()(cpu_ctx, xx_tmp, xx.size()); - for (int ii = 0; ii < xx.size(); ii++) { - xx_tmp[ii] = xx[ii]; - } - for (int ii = 0; ii < xx.size(); ii++) { - EXPECT_EQ(xx_tmp[ii], xx[ii]); - } - free(xx_tmp); -} - -TEST_F(TestModulePsiMemory, resize_memory_op_comlex_double_cpu) -{ - std::complex* z_xx_tmp = NULL; - resize_memory_comlex_double_cpu_op()(cpu_ctx, z_xx_tmp, z_xx.size()); - for (int ii = 0; ii < z_xx.size(); ii++) { - z_xx_tmp[ii] = z_xx[ii]; - } - for (int ii = 0; ii < z_xx.size(); ii++) { - EXPECT_EQ(z_xx_tmp[ii], z_xx[ii]); - } - free(z_xx_tmp); -} - -TEST_F(TestModulePsiMemory, synchronize_memory_op_double_cpu_to_cpu) -{ - std::vector h_xx(xx.size(), 0); - synchronize_memory_double_cpu_to_cpu_op()(cpu_ctx, cpu_ctx, h_xx.data(), xx.data(), xx.size()); - for (int ii = 0; ii < z_xx.size(); ii++) { - EXPECT_EQ(h_xx[ii], xx[ii]); - } -} - -TEST_F(TestModulePsiMemory, synchronize_memory_op_complex_double_cpu_to_cpu) -{ - std::vector> hz_xx(z_xx.size(), std::complex(0, 0)); - synchronize_memory_complex_double_cpu_to_cpu_op()(cpu_ctx, cpu_ctx, hz_xx.data(), z_xx.data(), z_xx.size()); - for (int ii = 0; ii < z_xx.size(); ii++) { - EXPECT_EQ(hz_xx[ii], z_xx[ii]); - } -} - -TEST_F(TestModulePsiMemory, delete_memory_op_double_cpu) -{ - double * h_xx = (double*)malloc(sizeof(double) * xx.size()); - delete_memory_double_cpu_op()(cpu_ctx, h_xx); -} - -TEST_F(TestModulePsiMemory, delete_memory_op_complex_double_cpu) -{ - std::complex * hz_xx = (std::complex*)malloc(sizeof(std::complex) * z_xx.size()); - delete_memory_complex_double_cpu_op()(cpu_ctx, hz_xx); -} - - -#if __UT_USE_CUDA || __UT_USE_ROCM -TEST_F(TestModulePsiMemory, set_memory_op_double_gpu) -{ - thrust::device_ptr d_xx = thrust::device_malloc(xx.size()); - thrust::copy(xx.begin(), xx.end(), d_xx); - set_memory_double_gpu_op()(gpu_ctx, thrust::raw_pointer_cast(d_xx), 0, xx.size()); - thrust::host_vector h_xx(xx.size()); - thrust::copy(d_xx, d_xx + xx.size(), h_xx.begin()); - for (int ii = 0; ii < xx.size(); ii++) { - EXPECT_EQ(h_xx[ii], 0.0); - } -} - -TEST_F(TestModulePsiMemory, set_memory_op_complex_double_gpu) -{ - thrust::device_ptr> dz_xx = thrust::device_malloc>(z_xx.size()); - thrust::copy(z_xx.begin(), z_xx.end(), dz_xx); - set_memory_complex_double_gpu_op()(gpu_ctx, thrust::raw_pointer_cast(dz_xx), 0, z_xx.size()); - thrust::host_vector> h_xx(z_xx.size()); - thrust::copy(dz_xx, dz_xx + z_xx.size(), h_xx.begin()); - for (int ii = 0; ii < z_xx.size(); ii++) { - EXPECT_EQ(h_xx[ii], std::complex(0.0, 0.0)); - } -} - -TEST_F(TestModulePsiMemory, resize_memory_op_double_gpu) -{ - double* xx_tmp = NULL; - resize_memory_double_gpu_op()(gpu_ctx, xx_tmp, xx.size()); - - thrust::device_ptr d_xx(xx_tmp); - thrust::copy(xx.begin(), xx.end(), d_xx); - - thrust::host_vector h_xx(xx.size()); - thrust::copy(d_xx, d_xx + xx.size(), h_xx.begin()); - for (int ii = 0; ii < xx.size(); ii++) { - EXPECT_EQ(h_xx[ii], xx[ii]); - } - thrust::device_free(d_xx); -} - -TEST_F(TestModulePsiMemory, resize_memory_op_complex_double_gpu) -{ - std::complex* z_xx_tmp = NULL; - resize_memory_comlex_double_gpu_op()(gpu_ctx, z_xx_tmp, z_xx.size()); - - thrust::device_ptr> dz_xx(z_xx_tmp); - thrust::copy(z_xx.begin(), z_xx.end(), dz_xx); - - thrust::host_vector> h_z_xx(z_xx.size()); - thrust::copy(dz_xx, dz_xx + z_xx.size(), h_z_xx.begin()); - for (int ii = 0; ii < z_xx.size(); ii++) { - EXPECT_EQ(h_z_xx[ii], z_xx[ii]); - } - thrust::device_free(dz_xx); -} - -TEST_F(TestModulePsiMemory, synchronize_memory_op_double_cpu_to_gpu) -{ - thrust::device_ptr d_xx = thrust::device_malloc(xx.size()); - std::vector hv_xx(xx.size(), 0); - thrust::copy(hv_xx.begin(), hv_xx.end(), d_xx); - synchronize_memory_double_cpu_to_gpu_op()( - gpu_ctx, - cpu_ctx, - thrust::raw_pointer_cast(d_xx), - xx.data(), - xx.size()); - - thrust::host_vector h_xx(xx.size()); - thrust::copy(d_xx, d_xx + xx.size(), h_xx.begin()); - for (int ii = 0; ii < xx.size(); ii++) { - EXPECT_EQ(h_xx[ii], xx[ii]); - } - thrust::device_free(d_xx); -} - -TEST_F(TestModulePsiMemory, synchronize_memory_op_double_gpu_to_cpu) -{ - thrust::device_ptr d_xx = thrust::device_malloc(xx.size()); - thrust::copy(xx.begin(), xx.end(), d_xx); - thrust::host_vector h_xx(xx.size()); - synchronize_memory_double_gpu_to_cpu_op()( - cpu_ctx, - gpu_ctx, - thrust::raw_pointer_cast(h_xx.data()), - thrust::raw_pointer_cast(d_xx), - xx.size()); - - for (int ii = 0; ii < xx.size(); ii++) { - EXPECT_EQ(h_xx[ii], xx[ii]); - } - thrust::device_free(d_xx); -} - -TEST_F(TestModulePsiMemory, synchronize_memory_op_double_gpu_to_gpu) -{ - thrust::device_ptr d1_xx = thrust::device_malloc(xx.size()); - thrust::device_ptr d2_xx = thrust::device_malloc(xx.size()); - thrust::copy(xx.begin(), xx.end(), d1_xx); - synchronize_memory_double_gpu_to_gpu_op()( - gpu_ctx, - gpu_ctx, - thrust::raw_pointer_cast(d2_xx), - thrust::raw_pointer_cast(d1_xx), - xx.size()); - - thrust::host_vector h_xx(xx.size()); - thrust::copy(d2_xx, d2_xx + xx.size(), h_xx.begin()); - for (int ii = 0; ii < xx.size(); ii++) { - EXPECT_EQ(h_xx[ii], xx[ii]); - } - thrust::device_free(thrust::device_ptr(d1_xx)); - thrust::device_free(thrust::device_ptr(d2_xx)); -} - -TEST_F(TestModulePsiMemory, synchronize_memory_op_complex_double_cpu_to_gpu) -{ - thrust::device_ptr> dz_xx = thrust::device_malloc>(z_xx.size()); - std::vector> hvz_xx(z_xx.size(), 0); - thrust::copy(hvz_xx.begin(), hvz_xx.end(), dz_xx); - synchronize_memory_complex_double_cpu_to_gpu_op()( - gpu_ctx, - cpu_ctx, - thrust::raw_pointer_cast(dz_xx), - z_xx.data(), - z_xx.size()); - - thrust::host_vector> hz_xx(z_xx.size()); - thrust::copy(dz_xx, dz_xx + z_xx.size(), hz_xx.begin()); - for (int ii = 0; ii < z_xx.size(); ii++) { - EXPECT_EQ(hz_xx[ii], z_xx[ii]); - } - thrust::device_free(dz_xx); -} - -TEST_F(TestModulePsiMemory, synchronize_memory_op_complex_double_gpu_to_cpu) -{ - thrust::device_ptr> dz_xx = thrust::device_malloc>(z_xx.size()); - thrust::copy(z_xx.begin(), z_xx.end(), dz_xx); - thrust::host_vector> hz_xx(z_xx.size()); - synchronize_memory_complex_double_gpu_to_cpu_op()( - cpu_ctx, - gpu_ctx, - thrust::raw_pointer_cast(hz_xx.data()), - thrust::raw_pointer_cast(dz_xx), - z_xx.size()); - - for (int ii = 0; ii < z_xx.size(); ii++) { - EXPECT_EQ(hz_xx[ii], z_xx[ii]); - } - thrust::device_free(dz_xx); -} - -TEST_F(TestModulePsiMemory, synchronize_memory_op_complex_double_gpu_to_gpu) -{ - thrust::device_ptr> dz1_xx = thrust::device_malloc>(z_xx.size()); - thrust::device_ptr> dz2_xx = thrust::device_malloc>(z_xx.size()); - thrust::copy(z_xx.begin(), z_xx.end(), dz1_xx); - synchronize_memory_complex_double_gpu_to_gpu_op()( - gpu_ctx, - gpu_ctx, - thrust::raw_pointer_cast(dz2_xx), - thrust::raw_pointer_cast(dz1_xx), - z_xx.size()); - - thrust::host_vector> h_xx(z_xx.size()); - thrust::copy(dz2_xx, dz2_xx + z_xx.size(), h_xx.begin()); - for (int ii = 0; ii < z_xx.size(); ii++) { - EXPECT_EQ(h_xx[ii], z_xx[ii]); - } - thrust::device_free(thrust::device_ptr>(dz1_xx)); - thrust::device_free(thrust::device_ptr>(dz2_xx)); -} - -TEST_F(TestModulePsiMemory, delete_memory_op_double_gpu) -{ - thrust::device_ptr d_xx = thrust::device_malloc(xx.size()); - delete_memory_double_gpu_op()(gpu_ctx, thrust::raw_pointer_cast(d_xx)); -} - -TEST_F(TestModulePsiMemory, delete_memory_op_complex_double_gpu) -{ - thrust::device_ptr> dz_xx = thrust::device_malloc>(z_xx.size()); - delete_memory_complex_double_gpu_op()(gpu_ctx, thrust::raw_pointer_cast(dz_xx)); -} - -#endif // __UT_USE_CUDA || __UT_USE_ROCM diff --git a/source/module_psi/kernels/types.h b/source/module_psi/kernels/types.h deleted file mode 100644 index 6c07620d17..0000000000 --- a/source/module_psi/kernels/types.h +++ /dev/null @@ -1,16 +0,0 @@ -// // TODO: This is a temperary location for these functions. -// // And will be moved to a global module(module base) later. -// #ifndef MODULE_PSI_TYPES_H_ -// #define MODULE_PSI_TYPES_H_ - -// namespace psi { - -// struct DEVICE_CPU; -// struct DEVICE_GPU; -// struct DEVICE_SYCL; - -// enum base_device::AbacusDevice_t {UnKnown, CpuDevice, GpuDevice, SyclDevice}; - -// } // end of namespace psi - -// #endif // MODULE_PSI_TYPES_H_ \ No newline at end of file From 3538efd2b34d7fdf3877756f98f49d16ab017d3b Mon Sep 17 00:00:00 2001 From: haozhihan Date: Fri, 14 Jun 2024 20:05:10 +0800 Subject: [PATCH 2/2] fix build bug --- source/module_base/kernels/test/math_op_test.cpp | 1 - source/module_base/math_ylmreal.cpp | 1 - source/module_basis/module_pw/kernels/test/pw_op_test.cpp | 1 - source/module_elecstate/kernels/test/elecstate_op_test.cpp | 1 - source/module_hamilt_pw/hamilt_pwdft/forces.h | 1 - .../hamilt_pwdft/kernels/test/ekinetic_op_test.cpp | 1 - .../module_hamilt_pw/hamilt_pwdft/kernels/test/force_op_test.cpp | 1 - .../module_hamilt_pw/hamilt_pwdft/kernels/test/meta_op_test.cpp | 1 - .../hamilt_pwdft/kernels/test/nonlocal_op_test.cpp | 1 - .../hamilt_pwdft/kernels/test/stress_op_test.cpp | 1 - .../module_hamilt_pw/hamilt_pwdft/kernels/test/veff_op_test.cpp | 1 - .../module_hamilt_pw/hamilt_pwdft/kernels/test/vnl_op_test.cpp | 1 - source/module_hamilt_pw/hamilt_pwdft/kernels/test/wf_op_test.cpp | 1 - source/module_hsolver/diago_bpcg.h | 1 - source/module_hsolver/kernels/cuda/math_kernel_op.cu | 1 - source/module_hsolver/kernels/rocm/math_kernel_op.hip.cu | 1 - source/module_hsolver/kernels/test/math_dngvd_test.cpp | 1 - source/module_hsolver/kernels/test/math_kernel_test.cpp | 1 - source/module_hsolver/kernels/test/perf_math_kernel.cpp | 1 - source/module_psi/psi.h | 1 - 20 files changed, 20 deletions(-) diff --git a/source/module_base/kernels/test/math_op_test.cpp b/source/module_base/kernels/test/math_op_test.cpp index ea736c97c9..7136ab8d35 100644 --- a/source/module_base/kernels/test/math_op_test.cpp +++ b/source/module_base/kernels/test/math_op_test.cpp @@ -1,7 +1,6 @@ #include "module_base/kernels/math_op.h" #include "module_base/module_device/memory_op.h" -// #include "module_psi/kernels/memory_op.h" #include #include diff --git a/source/module_base/math_ylmreal.cpp b/source/module_base/math_ylmreal.cpp index ecd9aebbf4..331b867c93 100644 --- a/source/module_base/math_ylmreal.cpp +++ b/source/module_base/math_ylmreal.cpp @@ -4,7 +4,6 @@ #include "module_base/kernels/math_op.h" #include "module_base/libm/libm.h" #include "module_base/module_device/memory_op.h" -// #include "module_psi/kernels/memory_op.h" #include "realarray.h" #include "timer.h" #include "tool_quit.h" diff --git a/source/module_basis/module_pw/kernels/test/pw_op_test.cpp b/source/module_basis/module_pw/kernels/test/pw_op_test.cpp index 62d94adc37..96cc760383 100644 --- a/source/module_basis/module_pw/kernels/test/pw_op_test.cpp +++ b/source/module_basis/module_pw/kernels/test/pw_op_test.cpp @@ -1,7 +1,6 @@ #include "module_basis/module_pw/kernels/pw_op.h" #include "module_base/module_device/memory_op.h" -// #include "module_psi/kernels/memory_op.h" #include #include diff --git a/source/module_elecstate/kernels/test/elecstate_op_test.cpp b/source/module_elecstate/kernels/test/elecstate_op_test.cpp index 58018a836a..79635c7895 100644 --- a/source/module_elecstate/kernels/test/elecstate_op_test.cpp +++ b/source/module_elecstate/kernels/test/elecstate_op_test.cpp @@ -1,7 +1,6 @@ #include "module_elecstate/kernels/elecstate_op.h" #include "module_base/module_device/memory_op.h" -// #include "module_psi/kernels/memory_op.h" #include #include diff --git a/source/module_hamilt_pw/hamilt_pwdft/forces.h b/source/module_hamilt_pw/hamilt_pwdft/forces.h index 2f067e92ec..979620d7ec 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/forces.h +++ b/source/module_hamilt_pw/hamilt_pwdft/forces.h @@ -11,7 +11,6 @@ #include "module_elecstate/elecstate.h" #include "module_hamilt_pw/hamilt_pwdft/kernels/force_op.h" #include "module_hsolver/kernels/math_kernel_op.h" -// #include "module_psi/kernels/memory_op.h" #include "module_psi/psi.h" #include "structure_factor.h" diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/ekinetic_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/ekinetic_op_test.cpp index fd4126801a..03a5792bed 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/ekinetic_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/ekinetic_op_test.cpp @@ -1,7 +1,6 @@ #include "module_hamilt_pw/hamilt_pwdft/kernels/ekinetic_op.h" #include "module_base/module_device/memory_op.h" -// #include "module_psi/kernels/memory_op.h" #include #include diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/force_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/force_op_test.cpp index 089f2c720f..fbe10d9af3 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/force_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/force_op_test.cpp @@ -1,7 +1,6 @@ #include "module_hamilt_pw/hamilt_pwdft/kernels/force_op.h" #include "module_base/module_device/memory_op.h" -// #include "module_psi/kernels/memory_op.h" #include #include diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/meta_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/meta_op_test.cpp index 58790c8c2b..d9e9244004 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/meta_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/meta_op_test.cpp @@ -1,7 +1,6 @@ #include "module_hamilt_pw/hamilt_pwdft/kernels/meta_op.h" #include "module_base/module_device/memory_op.h" -// #include "module_psi/kernels/memory_op.h" #include #include diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/nonlocal_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/nonlocal_op_test.cpp index ebecc24c24..8591182d4b 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/nonlocal_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/nonlocal_op_test.cpp @@ -1,7 +1,6 @@ #include "module_hamilt_pw/hamilt_pwdft/kernels/nonlocal_op.h" #include "module_base/module_device/memory_op.h" -// #include "module_psi/kernels/memory_op.h" #include #include diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/stress_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/stress_op_test.cpp index 96cc2d9d77..a6e482cdae 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/stress_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/stress_op_test.cpp @@ -1,7 +1,6 @@ #include "module_hamilt_pw/hamilt_pwdft/kernels/stress_op.h" #include "module_base/module_device/memory_op.h" -// #include "module_psi/kernels/memory_op.h" #include #include diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/veff_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/veff_op_test.cpp index 747c02ce93..318646f063 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/veff_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/veff_op_test.cpp @@ -1,7 +1,6 @@ #include "module_hamilt_pw/hamilt_pwdft/kernels/veff_op.h" #include "module_base/module_device/memory_op.h" -// #include "module_psi/kernels/memory_op.h" #include #include diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/vnl_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/vnl_op_test.cpp index be4df99e1a..77ec09beac 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/vnl_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/vnl_op_test.cpp @@ -1,7 +1,6 @@ #include "module_hamilt_pw/hamilt_pwdft/kernels/vnl_op.h" #include "module_base/module_device/memory_op.h" -// #include "module_psi/kernels/memory_op.h" #include #include diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/wf_op_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/wf_op_test.cpp index 09ec5ee1c0..8b46679d67 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/wf_op_test.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/wf_op_test.cpp @@ -1,7 +1,6 @@ #include "module_hamilt_pw/hamilt_pwdft/kernels/wf_op.h" #include "module_base/module_device/memory_op.h" -// #include "module_psi/kernels/memory_op.h" #include #include diff --git a/source/module_hsolver/diago_bpcg.h b/source/module_hsolver/diago_bpcg.h index 75b4474b36..71040960c3 100644 --- a/source/module_hsolver/diago_bpcg.h +++ b/source/module_hsolver/diago_bpcg.h @@ -6,7 +6,6 @@ #include "module_hamilt_pw/hamilt_pwdft/structure_factor.h" #include "module_base/module_device/types.h" -// #include "module_psi/kernels/memory_op.h" #include "module_base/module_device/memory_op.h" #include "module_hsolver/kernels/math_kernel_op.h" diff --git a/source/module_hsolver/kernels/cuda/math_kernel_op.cu b/source/module_hsolver/kernels/cuda/math_kernel_op.cu index a731930628..ed1b9379f9 100644 --- a/source/module_hsolver/kernels/cuda/math_kernel_op.cu +++ b/source/module_hsolver/kernels/cuda/math_kernel_op.cu @@ -1,6 +1,5 @@ #include "module_base/module_device/memory_op.h" #include "module_hsolver/kernels/math_kernel_op.h" -#include "module_psi/kernels/memory_op.h" #include "module_psi/psi.h" #include "module_base/tool_quit.h" diff --git a/source/module_hsolver/kernels/rocm/math_kernel_op.hip.cu b/source/module_hsolver/kernels/rocm/math_kernel_op.hip.cu index 2d83f4172f..89cf59f6fa 100644 --- a/source/module_hsolver/kernels/rocm/math_kernel_op.hip.cu +++ b/source/module_hsolver/kernels/rocm/math_kernel_op.hip.cu @@ -1,6 +1,5 @@ #include "module_base/module_device/memory_op.h" #include "module_hsolver/kernels/math_kernel_op.h" -#include "module_psi/kernels/memory_op.h" #include "module_psi/psi.h" #include "module_base/tool_quit.h" diff --git a/source/module_hsolver/kernels/test/math_dngvd_test.cpp b/source/module_hsolver/kernels/test/math_dngvd_test.cpp index 230ab05afd..8b614ae9a0 100644 --- a/source/module_hsolver/kernels/test/math_dngvd_test.cpp +++ b/source/module_hsolver/kernels/test/math_dngvd_test.cpp @@ -3,7 +3,6 @@ #include "module_base/module_device/memory_op.h" #include "module_hsolver/kernels/dngvd_op.h" #include "module_hsolver/kernels/math_kernel_op.h" -#include "module_psi/kernels/memory_op.h" #include #include diff --git a/source/module_hsolver/kernels/test/math_kernel_test.cpp b/source/module_hsolver/kernels/test/math_kernel_test.cpp index 56d7309d45..c2c66fb936 100644 --- a/source/module_hsolver/kernels/test/math_kernel_test.cpp +++ b/source/module_hsolver/kernels/test/math_kernel_test.cpp @@ -2,7 +2,6 @@ #include "module_base/constants.h" #include "module_base/module_device/memory_op.h" #include "module_hsolver/kernels/math_kernel_op.h" -#include "module_psi/kernels/memory_op.h" #include #include diff --git a/source/module_hsolver/kernels/test/perf_math_kernel.cpp b/source/module_hsolver/kernels/test/perf_math_kernel.cpp index da5994e1a9..173ef8b40b 100644 --- a/source/module_hsolver/kernels/test/perf_math_kernel.cpp +++ b/source/module_hsolver/kernels/test/perf_math_kernel.cpp @@ -2,7 +2,6 @@ #include "module_base/constants.h" #include "module_base/module_device/memory_op.h" #include "module_hsolver/kernels/math_kernel_op.h" -#include "module_psi/kernels/memory_op.h" #include #include diff --git a/source/module_psi/psi.h b/source/module_psi/psi.h index 95ca8028e2..39956321fd 100644 --- a/source/module_psi/psi.h +++ b/source/module_psi/psi.h @@ -3,7 +3,6 @@ #include "module_base/module_device/memory_op.h" #include "module_base/module_device/types.h" -// #include "module_psi/kernels/memory_op.h" #include