From 854b31a064a575d4058fc1fa66479e0273b90566 Mon Sep 17 00:00:00 2001 From: John Brittain Date: Fri, 12 May 2023 09:47:13 +0000 Subject: [PATCH 1/4] Add openmp nvector to idaklu --- FindSUNDIALS.cmake | 1 + pybamm/solvers/c_solvers/idaklu/casadi_solver.cpp | 12 ++++++++++++ pybamm/solvers/c_solvers/idaklu/common.hpp | 1 + scripts/install_KLU_Sundials.py | 5 +++-- 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/FindSUNDIALS.cmake b/FindSUNDIALS.cmake index 92a9b785e9..4f8c52b64f 100644 --- a/FindSUNDIALS.cmake +++ b/FindSUNDIALS.cmake @@ -51,6 +51,7 @@ set(SUNDIALS_WANT_COMPONENTS sundials_sunlinsollapackdense sundials_sunmatrixsparse sundials_nvecserial + sundials_nvecopenmp ) # find the SUNDIALS libraries diff --git a/pybamm/solvers/c_solvers/idaklu/casadi_solver.cpp b/pybamm/solvers/c_solvers/idaklu/casadi_solver.cpp index d10d0bdbf6..9742f60e2e 100644 --- a/pybamm/solvers/c_solvers/idaklu/casadi_solver.cpp +++ b/pybamm/solvers/c_solvers/idaklu/casadi_solver.cpp @@ -3,6 +3,8 @@ #include "common.hpp" #include +#define OPENMP 1 + CasadiSolver * create_casadi_solver(int number_of_states, int number_of_parameters, const Function &rhs_alg, const Function &jac_times_cjmass, @@ -54,10 +56,20 @@ CasadiSolver::CasadiSolver(np_array atol_np, double rel_tol, // allocate vectors #if SUNDIALS_VERSION_MAJOR >= 6 +# ifdef OPENMP + DEBUG("CasadiSolver::CasadiSolver --- Pthread version"); + int num_threads = 4; + yy = N_VNew_OpenMP(number_of_states, num_threads, sunctx); + yp = N_VNew_OpenMP(number_of_states, num_threads, sunctx); + avtol = N_VNew_OpenMP(number_of_states, num_threads, sunctx); + id = N_VNew_OpenMP(number_of_states, num_threads, sunctx); +# else + DEBUG("CasadiSolver::CasadiSolver --- serial version"); yy = N_VNew_Serial(number_of_states, sunctx); yp = N_VNew_Serial(number_of_states, sunctx); avtol = N_VNew_Serial(number_of_states, sunctx); id = N_VNew_Serial(number_of_states, sunctx); +# endif #else yy = N_VNew_Serial(number_of_states); yp = N_VNew_Serial(number_of_states); diff --git a/pybamm/solvers/c_solvers/idaklu/common.hpp b/pybamm/solvers/c_solvers/idaklu/common.hpp index b1947654ea..1931f8cb61 100644 --- a/pybamm/solvers/c_solvers/idaklu/common.hpp +++ b/pybamm/solvers/c_solvers/idaklu/common.hpp @@ -5,6 +5,7 @@ #include /* access to IDABBDPRE preconditioner */ #include /* access to serial N_Vector */ +#include /* access to openmp N_Vector */ #include /* defs. of SUNRabs, SUNRexp, etc. */ #include /* defs. of SUNRabs, SUNRexp, etc. */ #include /* defs. of realtype, sunindextype */ diff --git a/scripts/install_KLU_Sundials.py b/scripts/install_KLU_Sundials.py index 71f9936169..63a035d8fa 100755 --- a/scripts/install_KLU_Sundials.py +++ b/scripts/install_KLU_Sundials.py @@ -101,10 +101,11 @@ def download_extract_library(url, download_dir): KLU_INCLUDE_DIR = os.path.join(install_dir, "include") KLU_LIBRARY_DIR = os.path.join(install_dir, "lib") cmake_args = [ - "-DLAPACK_ENABLE=ON", + "-DENABLE_LAPACK=ON", "-DSUNDIALS_INDEX_SIZE=32", "-DEXAMPLES_ENABLE:BOOL=OFF", - "-DKLU_ENABLE=ON", + "-DENABLE_KLU=ON", + "-DENABLE_OPENMP=ON", "-DKLU_INCLUDE_DIR={}".format(KLU_INCLUDE_DIR), "-DKLU_LIBRARY_DIR={}".format(KLU_LIBRARY_DIR), "-DCMAKE_INSTALL_PREFIX=" + install_dir, From f023f8179ea8fffdf1535525869c95c77cdd819d Mon Sep 17 00:00:00 2001 From: John Brittain Date: Fri, 12 May 2023 10:31:27 +0000 Subject: [PATCH 2/4] Add num_threads to idaklu solver options --- pybamm/solvers/c_solvers/idaklu/casadi_solver.cpp | 4 +++- pybamm/solvers/c_solvers/idaklu/options.cpp | 3 ++- pybamm/solvers/c_solvers/idaklu/options.hpp | 1 + pybamm/solvers/idaklu_solver.py | 4 ++++ 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/pybamm/solvers/c_solvers/idaklu/casadi_solver.cpp b/pybamm/solvers/c_solvers/idaklu/casadi_solver.cpp index 9742f60e2e..53560a55f9 100644 --- a/pybamm/solvers/c_solvers/idaklu/casadi_solver.cpp +++ b/pybamm/solvers/c_solvers/idaklu/casadi_solver.cpp @@ -3,6 +3,7 @@ #include "common.hpp" #include +#include #define OPENMP 1 CasadiSolver * @@ -58,7 +59,8 @@ CasadiSolver::CasadiSolver(np_array atol_np, double rel_tol, #if SUNDIALS_VERSION_MAJOR >= 6 # ifdef OPENMP DEBUG("CasadiSolver::CasadiSolver --- Pthread version"); - int num_threads = 4; + int num_threads = options.num_threads; + DEBUG(num_threads); yy = N_VNew_OpenMP(number_of_states, num_threads, sunctx); yp = N_VNew_OpenMP(number_of_states, num_threads, sunctx); avtol = N_VNew_OpenMP(number_of_states, num_threads, sunctx); diff --git a/pybamm/solvers/c_solvers/idaklu/options.cpp b/pybamm/solvers/c_solvers/idaklu/options.cpp index f5d1d8c79e..2a5ed58d9c 100644 --- a/pybamm/solvers/c_solvers/idaklu/options.cpp +++ b/pybamm/solvers/c_solvers/idaklu/options.cpp @@ -12,7 +12,8 @@ Options::Options(py::dict options) linsol_max_iterations(options["linsol_max_iterations"].cast()), linear_solver(options["linear_solver"].cast()), precon_half_bandwidth(options["precon_half_bandwidth"].cast()), - precon_half_bandwidth_keep(options["precon_half_bandwidth_keep"].cast()) + precon_half_bandwidth_keep(options["precon_half_bandwidth_keep"].cast()), + num_threads(options["num_threads"].cast()) { using_sparse_matrix = true; diff --git a/pybamm/solvers/c_solvers/idaklu/options.hpp b/pybamm/solvers/c_solvers/idaklu/options.hpp index ecaffe8307..30e6fc634d 100644 --- a/pybamm/solvers/c_solvers/idaklu/options.hpp +++ b/pybamm/solvers/c_solvers/idaklu/options.hpp @@ -14,6 +14,7 @@ struct Options { int linsol_max_iterations; int precon_half_bandwidth; int precon_half_bandwidth_keep; + int num_threads; explicit Options(py::dict options); }; diff --git a/pybamm/solvers/idaklu_solver.py b/pybamm/solvers/idaklu_solver.py index 40f5acb7a7..2dcf4f3bdd 100644 --- a/pybamm/solvers/idaklu_solver.py +++ b/pybamm/solvers/idaklu_solver.py @@ -74,6 +74,9 @@ class IDAKLUSolver(pybamm.BaseSolver): # for iterative linear solver preconditioner, bandwidth of # approximate jacobian that is kept "precon_half_bandwidth_keep": 5 + + # Number of threads available for OpenMP + "num_threads": 1 } Note: These options only have an effect if model.convert_to_format == 'casadi' @@ -100,6 +103,7 @@ def __init__( "linsol_max_iterations": 5, "precon_half_bandwidth": 5, "precon_half_bandwidth_keep": 5, + "num_threads": 1, } if options is None: options = default_options From 49b8fa0dce55deb0839a5a579bf37e34e1ee80d9 Mon Sep 17 00:00:00 2001 From: John Brittain Date: Fri, 12 May 2023 13:17:50 +0000 Subject: [PATCH 3/4] Replace serial (NV_DATA_S) with OpenMP (NV_DATA_OMP) data structures in idaklu solver --- .../c_solvers/idaklu/casadi_solver.cpp | 22 +++--------- .../idaklu/casadi_sundials_functions.cpp | 36 +++++++++---------- 2 files changed, 23 insertions(+), 35 deletions(-) diff --git a/pybamm/solvers/c_solvers/idaklu/casadi_solver.cpp b/pybamm/solvers/c_solvers/idaklu/casadi_solver.cpp index 53560a55f9..d91ce79190 100644 --- a/pybamm/solvers/c_solvers/idaklu/casadi_solver.cpp +++ b/pybamm/solvers/c_solvers/idaklu/casadi_solver.cpp @@ -3,8 +3,6 @@ #include "common.hpp" #include -#include -#define OPENMP 1 CasadiSolver * create_casadi_solver(int number_of_states, int number_of_parameters, @@ -56,27 +54,17 @@ CasadiSolver::CasadiSolver(np_array atol_np, double rel_tol, #endif // allocate vectors -#if SUNDIALS_VERSION_MAJOR >= 6 -# ifdef OPENMP - DEBUG("CasadiSolver::CasadiSolver --- Pthread version"); int num_threads = options.num_threads; - DEBUG(num_threads); +#if SUNDIALS_VERSION_MAJOR >= 6 yy = N_VNew_OpenMP(number_of_states, num_threads, sunctx); yp = N_VNew_OpenMP(number_of_states, num_threads, sunctx); avtol = N_VNew_OpenMP(number_of_states, num_threads, sunctx); id = N_VNew_OpenMP(number_of_states, num_threads, sunctx); -# else - DEBUG("CasadiSolver::CasadiSolver --- serial version"); - yy = N_VNew_Serial(number_of_states, sunctx); - yp = N_VNew_Serial(number_of_states, sunctx); - avtol = N_VNew_Serial(number_of_states, sunctx); - id = N_VNew_Serial(number_of_states, sunctx); -# endif #else - yy = N_VNew_Serial(number_of_states); - yp = N_VNew_Serial(number_of_states); - avtol = N_VNew_Serial(number_of_states); - id = N_VNew_Serial(number_of_states); + yy = N_VNew_OpenMP(number_of_states, num_threads); + yp = N_VNew_OpenMP(number_of_states, num_threads); + avtol = N_VNew_OpenMP(number_of_states, num_threads); + id = N_VNew_OpenMP(number_of_states, num_threads); #endif if (number_of_parameters > 0) diff --git a/pybamm/solvers/c_solvers/idaklu/casadi_sundials_functions.cpp b/pybamm/solvers/c_solvers/idaklu/casadi_sundials_functions.cpp index 031ef67d20..a88aac559e 100644 --- a/pybamm/solvers/c_solvers/idaklu/casadi_sundials_functions.cpp +++ b/pybamm/solvers/c_solvers/idaklu/casadi_sundials_functions.cpp @@ -10,19 +10,19 @@ int residual_casadi(realtype tres, N_Vector yy, N_Vector yp, N_Vector rr, static_cast(user_data); p_python_functions->rhs_alg.m_arg[0] = &tres; - p_python_functions->rhs_alg.m_arg[1] = NV_DATA_S(yy); + p_python_functions->rhs_alg.m_arg[1] = NV_DATA_OMP(yy); p_python_functions->rhs_alg.m_arg[2] = p_python_functions->inputs.data(); - p_python_functions->rhs_alg.m_res[0] = NV_DATA_S(rr); + p_python_functions->rhs_alg.m_res[0] = NV_DATA_OMP(rr); p_python_functions->rhs_alg(); realtype *tmp = p_python_functions->get_tmp_state_vector(); - p_python_functions->mass_action.m_arg[0] = NV_DATA_S(yp); + p_python_functions->mass_action.m_arg[0] = NV_DATA_OMP(yp); p_python_functions->mass_action.m_res[0] = tmp; p_python_functions->mass_action(); // AXPY: y <- a*x + y const int ns = p_python_functions->number_of_states; - casadi::casadi_axpy(ns, -1., tmp, NV_DATA_S(rr)); + casadi::casadi_axpy(ns, -1., tmp, NV_DATA_OMP(rr)); DEBUG_VECTOR(yy); DEBUG_VECTOR(yp); @@ -101,22 +101,22 @@ int jtimes_casadi(realtype tt, N_Vector yy, N_Vector yp, N_Vector rr, // Jv has ∂F/∂y v p_python_functions->jac_action.m_arg[0] = &tt; - p_python_functions->jac_action.m_arg[1] = NV_DATA_S(yy); + p_python_functions->jac_action.m_arg[1] = NV_DATA_OMP(yy); p_python_functions->jac_action.m_arg[2] = p_python_functions->inputs.data(); - p_python_functions->jac_action.m_arg[3] = NV_DATA_S(v); - p_python_functions->jac_action.m_res[0] = NV_DATA_S(Jv); + p_python_functions->jac_action.m_arg[3] = NV_DATA_OMP(v); + p_python_functions->jac_action.m_res[0] = NV_DATA_OMP(Jv); p_python_functions->jac_action(); // tmp has -∂F/∂y˙ v realtype *tmp = p_python_functions->get_tmp_state_vector(); - p_python_functions->mass_action.m_arg[0] = NV_DATA_S(v); + p_python_functions->mass_action.m_arg[0] = NV_DATA_OMP(v); p_python_functions->mass_action.m_res[0] = tmp; p_python_functions->mass_action(); // AXPY: y <- a*x + y // Jv has ∂F/∂y v + cj ∂F/∂y˙ v const int ns = p_python_functions->number_of_states; - casadi::casadi_axpy(ns, -cj, tmp, NV_DATA_S(Jv)); + casadi::casadi_axpy(ns, -cj, tmp, NV_DATA_OMP(Jv)); return 0; } @@ -163,7 +163,7 @@ int jacobian_casadi(realtype tt, realtype cj, N_Vector yy, N_Vector yp, // args are t, y, cj, put result in jacobian data matrix p_python_functions->jac_times_cjmass.m_arg[0] = &tt; - p_python_functions->jac_times_cjmass.m_arg[1] = NV_DATA_S(yy); + p_python_functions->jac_times_cjmass.m_arg[1] = NV_DATA_OMP(yy); p_python_functions->jac_times_cjmass.m_arg[2] = p_python_functions->inputs.data(); p_python_functions->jac_times_cjmass.m_arg[3] = &cj; @@ -227,7 +227,7 @@ int events_casadi(realtype t, N_Vector yy, N_Vector yp, realtype *events_ptr, // args are t, y, put result in events_ptr p_python_functions->events.m_arg[0] = &t; - p_python_functions->events.m_arg[1] = NV_DATA_S(yy); + p_python_functions->events.m_arg[1] = NV_DATA_OMP(yy); p_python_functions->events.m_arg[2] = p_python_functions->inputs.data(); p_python_functions->events.m_res[0] = events_ptr; p_python_functions->events(); @@ -270,11 +270,11 @@ int sensitivities_casadi(int Ns, realtype t, N_Vector yy, N_Vector yp, // args are t, y put result in rr p_python_functions->sens.m_arg[0] = &t; - p_python_functions->sens.m_arg[1] = NV_DATA_S(yy); + p_python_functions->sens.m_arg[1] = NV_DATA_OMP(yy); p_python_functions->sens.m_arg[2] = p_python_functions->inputs.data(); for (int i = 0; i < np; i++) { - p_python_functions->sens.m_res[i] = NV_DATA_S(resvalS[i]); + p_python_functions->sens.m_res[i] = NV_DATA_OMP(resvalS[i]); } // resvalsS now has (∂F/∂p i ) p_python_functions->sens(); @@ -284,23 +284,23 @@ int sensitivities_casadi(int Ns, realtype t, N_Vector yy, N_Vector yp, // put (∂F/∂y)s i (t) in tmp realtype *tmp = p_python_functions->get_tmp_state_vector(); p_python_functions->jac_action.m_arg[0] = &t; - p_python_functions->jac_action.m_arg[1] = NV_DATA_S(yy); + p_python_functions->jac_action.m_arg[1] = NV_DATA_OMP(yy); p_python_functions->jac_action.m_arg[2] = p_python_functions->inputs.data(); - p_python_functions->jac_action.m_arg[3] = NV_DATA_S(yS[i]); + p_python_functions->jac_action.m_arg[3] = NV_DATA_OMP(yS[i]); p_python_functions->jac_action.m_res[0] = tmp; p_python_functions->jac_action(); const int ns = p_python_functions->number_of_states; - casadi::casadi_axpy(ns, 1., tmp, NV_DATA_S(resvalS[i])); + casadi::casadi_axpy(ns, 1., tmp, NV_DATA_OMP(resvalS[i])); // put -(∂F/∂ ẏ) ṡ i (t) in tmp2 - p_python_functions->mass_action.m_arg[0] = NV_DATA_S(ypS[i]); + p_python_functions->mass_action.m_arg[0] = NV_DATA_OMP(ypS[i]); p_python_functions->mass_action.m_res[0] = tmp; p_python_functions->mass_action(); // (∂F/∂y)s i (t)+(∂F/∂ ẏ) ṡ i (t)+(∂F/∂p i ) // AXPY: y <- a*x + y - casadi::casadi_axpy(ns, -1., tmp, NV_DATA_S(resvalS[i])); + casadi::casadi_axpy(ns, -1., tmp, NV_DATA_OMP(resvalS[i])); } return 0; From 1b9a5cdc3f2bccd84739b31bb7c468234d143622 Mon Sep 17 00:00:00 2001 From: John Brittain Date: Fri, 12 May 2023 13:53:30 +0000 Subject: [PATCH 4/4] Add changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 84d68222aa..94d4cabdca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## Features +- Enable multithreading in IDAKLU solver ([#2947](https://github.com/pybamm-team/PyBaMM/pull/2947)) - If a solution contains cycles and steps, the cycle number and step number are now saved when `solution.save_data()` is called ([#2931](https://github.com/pybamm-team/PyBaMM/pull/2931)) ## Optimizations