From f40c2cae1aac18882cdf6f93eae6f2c12e213caa Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Fri, 17 May 2024 08:46:17 -0700 Subject: [PATCH 1/4] tbb fix --- cpp/daal/include/services/env_detect.h | 4 ++ .../src/externals/core_threading_win_dll.cpp | 43 +++++++++++++++++-- cpp/daal/src/services/env_detect.cpp | 13 +++++- cpp/daal/src/threading/threading.cpp | 22 ++++++++-- cpp/daal/src/threading/threading.h | 7 +++ 5 files changed, 79 insertions(+), 10 deletions(-) diff --git a/cpp/daal/include/services/env_detect.h b/cpp/daal/include/services/env_detect.h index f561de5ae2c..d132c55794d 100644 --- a/cpp/daal/include/services/env_detect.h +++ b/cpp/daal/include/services/env_detect.h @@ -198,6 +198,10 @@ class DAAL_EXPORT Environment : public Base void initNumberOfThreads(); env _env; + // Pointer to the oneapi::tbb::task_scheduler_handle class object, global for oneDAL. + // The oneapi::tbb::task_scheduler_handle and the oneapi::tbb::finalize function + // allow user to wait for completion of worker threads. + void * _schedulerHandle; void * _globalControl; SharedPtr _executionContext; }; diff --git a/cpp/daal/src/externals/core_threading_win_dll.cpp b/cpp/daal/src/externals/core_threading_win_dll.cpp index bfd7ac01a32..db381db08ad 100644 --- a/cpp/daal/src/externals/core_threading_win_dll.cpp +++ b/cpp/daal/src/externals/core_threading_win_dll.cpp @@ -143,7 +143,9 @@ typedef void (*_daal_wait_task_group_t)(void * taskGroupPtr); typedef bool (*_daal_is_in_parallel_t)(); typedef void (*_daal_tbb_task_scheduler_free_t)(void *& globalControl); +typedef void (*_daal_tbb_task_scheduler_handle_free_t)(void *& schedulerHandle); typedef size_t (*_setNumberOfThreads_t)(const size_t, void **); +typedef size_t (*_setSchedulerHandle_t)(void **); typedef void * (*_daal_threader_env_t)(); typedef void (*_daal_parallel_sort_int32_t)(int *, int *); @@ -205,10 +207,12 @@ static _daal_del_task_group_t _daal_del_task_group_ptr = NULL; static _daal_run_task_group_t _daal_run_task_group_ptr = NULL; static _daal_wait_task_group_t _daal_wait_task_group_ptr = NULL; -static _daal_is_in_parallel_t _daal_is_in_parallel_ptr = NULL; -static _daal_tbb_task_scheduler_free_t _daal_tbb_task_scheduler_free_ptr = NULL; -static _setNumberOfThreads_t _setNumberOfThreads_ptr = NULL; -static _daal_threader_env_t _daal_threader_env_ptr = NULL; +static _daal_is_in_parallel_t _daal_is_in_parallel_ptr = NULL; +static _daal_tbb_task_scheduler_free_t _daal_tbb_task_scheduler_free_ptr = NULL; +static _daal_tbb_task_scheduler_handle_free_t _daal_tbb_task_scheduler_handle_free_ptr = NULL; +static _setNumberOfThreads_t _setNumberOfThreads_ptr = NULL; +static _setSchedulerHandle_t _setSchedulerHandle_ptr = NULL; +static _daal_threader_env_t _daal_threader_env_ptr = NULL; static _daal_parallel_sort_int32_t _daal_parallel_sort_int32_ptr = NULL; static _daal_parallel_sort_uint64_t _daal_parallel_sort_uint64_ptr = NULL; @@ -657,6 +661,27 @@ DAAL_EXPORT void _daal_tbb_task_scheduler_free(void *& init) return _daal_tbb_task_scheduler_free_ptr(init); } +DAAL_EXPORT void _daal_tbb_task_scheduler_handle_free(void *& init) +{ + if (init == NULL) + { + // If threading library was not opened, there is nothing to free, + // so we do not need to load threading library. + // Moreover, loading threading library in the Environment destructor + // results in a crush because of the use of Wintrust library after it was unloaded. + // This happens due to undefined order of static objects deinitialization + // like Environment, and dependent libraries. + return; + } + + load_daal_thr_dll(); + if (_daal_tbb_task_scheduler_handle_free_ptr == NULL) + { + _daal_tbb_task_scheduler_handle_free_ptr = (_daal_tbb_task_scheduler_handle_free_t)load_daal_thr_func("_daal_tbb_task_scheduler_handle_free"); + } + return _daal_tbb_task_scheduler_handle_free_ptr(init); +} + DAAL_EXPORT size_t _setNumberOfThreads(const size_t numThreads, void ** init) { load_daal_thr_dll(); @@ -667,6 +692,16 @@ DAAL_EXPORT size_t _setNumberOfThreads(const size_t numThreads, void ** init) return _setNumberOfThreads_ptr(numThreads, init); } +DAAL_EXPORT size_t _setSchedulerHandle(void ** init) +{ + load_daal_thr_dll(); + if (_setSchedulerHandle_ptr == NULL) + { + _setSchedulerHandle_ptr = (_setSchedulerHandle_t)load_daal_thr_func("_setSchedulerHandle"); + } + return _setSchedulerHandle_ptr(init); +} + DAAL_EXPORT void * _daal_threader_env() { load_daal_thr_dll(); diff --git a/cpp/daal/src/services/env_detect.cpp b/cpp/daal/src/services/env_detect.cpp index 6698ede0d3a..8a4d43d8ac5 100644 --- a/cpp/daal/src/services/env_detect.cpp +++ b/cpp/daal/src/services/env_detect.cpp @@ -125,8 +125,14 @@ DAAL_EXPORT void daal::services::Environment::setDynamicLibraryThreadingTypeOnWi initNumberOfThreads(); } -DAAL_EXPORT daal::services::Environment::Environment() : _globalControl {} -{ +DAAL_EXPORT daal::services::Environment::Environment() : _schedulerHandle {}, _globalControl {} +{ + // Initializes global oneapi::tbb::task_scheduler_handle object in oneDAL to prevent the unexpected + // destruction of the calling thread. + // When the oneapi::tbb::finalize function is called with an oneapi::tbb::task_scheduler_handle + // instance, it blocks the calling thread until the completion of all worker + // threads that were implicitly created by the library. + daal::setSchedulerHandle(&_schedulerHandle); _env.cpuid_init_flag = false; _env.cpuid = -1; this->setDefaultExecutionContext(internal::CpuExecutionContext()); @@ -137,6 +143,7 @@ DAAL_EXPORT daal::services::Environment::Environment(const Environment & e) : da DAAL_EXPORT void daal::services::Environment::initNumberOfThreads() { if (isInit) return; + daal::setSchedulerHandle(&_schedulerHandle); /* if HT enabled - set _numThreads to physical cores num */ if (daal::internal::ServiceInst::serv_get_ht()) @@ -157,6 +164,7 @@ DAAL_EXPORT daal::services::Environment::~Environment() { daal::services::daal_free_buffers(); _daal_tbb_task_scheduler_free(_globalControl); + _daal_tbb_task_scheduler_handle_free(_schedulerHandle); } void daal::services::Environment::_cpu_detect(int enable) @@ -171,6 +179,7 @@ void daal::services::Environment::_cpu_detect(int enable) DAAL_EXPORT void daal::services::Environment::setNumberOfThreads(const size_t numThreads) { isInit = true; + daal::setSchedulerHandle(&_schedulerHandle); daal::setNumberOfThreads(numThreads, &_globalControl); } diff --git a/cpp/daal/src/threading/threading.cpp b/cpp/daal/src/threading/threading.cpp index 7fa0127a5ab..cc1bff804c0 100644 --- a/cpp/daal/src/threading/threading.cpp +++ b/cpp/daal/src/threading/threading.cpp @@ -66,18 +66,24 @@ DAAL_EXPORT void _threaded_scalable_free(void * ptr) DAAL_EXPORT void _daal_tbb_task_scheduler_free(void *& globalControl) { -#if defined(__DO_TBB_LAYER__) if (globalControl) { delete reinterpret_cast(globalControl); globalControl = nullptr; } -#endif +} + +DAAL_EXPORT void _daal_tbb_task_scheduler_handle_free(void *& schedulerHandle) +{ + if (schedulerHandle) + { + delete reinterpret_cast(schedulerHandle); + schedulerHandle = nullptr; + } } DAAL_EXPORT size_t _setNumberOfThreads(const size_t numThreads, void ** globalControl) { -#if defined(__DO_TBB_LAYER__) static tbb::spin_mutex mt; tbb::spin_mutex::scoped_lock lock(mt); if (numThreads != 0) @@ -87,11 +93,19 @@ DAAL_EXPORT size_t _setNumberOfThreads(const size_t numThreads, void ** globalCo daal::threader_env()->setNumberOfThreads(numThreads); return numThreads; } -#endif + daal::threader_env()->setNumberOfThreads(1); return 1; } +DAAL_EXPORT size_t _setSchedulerHandle(void ** schedulerHandle) +{ + *schedulerHandle = reinterpret_cast(new tbb::task_scheduler_handle(tbb::attach {})); + // It is necessary for initializing tbb in cases where DAAL does not use it. + tbb::task_arena {}.initialize(); + return 0; +} + DAAL_EXPORT void _daal_threader_for(int n, int threads_request, const void * a, daal::functype func) { #if defined(__DO_TBB_LAYER__) diff --git a/cpp/daal/src/threading/threading.h b/cpp/daal/src/threading/threading.h index 4d00c789494..0b4a9881b97 100644 --- a/cpp/daal/src/threading/threading.h +++ b/cpp/daal/src/threading/threading.h @@ -102,7 +102,9 @@ extern "C" DAAL_EXPORT void _daal_wait_task_group(void * taskGroupPtr); DAAL_EXPORT void _daal_tbb_task_scheduler_free(void *& globalControl); + DAAL_EXPORT void _daal_tbb_task_scheduler_handle_free(void *& schedulerHandle); DAAL_EXPORT size_t _setNumberOfThreads(const size_t numThreads, void ** globalControl); + DAAL_EXPORT size_t _setSchedulerHandle(void ** schedulerHandle); DAAL_EXPORT void * _daal_threader_env(); @@ -183,6 +185,11 @@ inline size_t threader_get_threads_number() return threader_env()->getNumberOfThreads(); } +inline size_t setSchedulerHandle(void ** schedulerHandle) +{ + return _setSchedulerHandle(schedulerHandle); +} + inline size_t setNumberOfThreads(const size_t numThreads, void ** globalControl) { return _setNumberOfThreads(numThreads, globalControl); From f3e199ee889bc79c58765c9c642decaeca6b9e2f Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Mon, 20 May 2024 13:17:32 -0700 Subject: [PATCH 2/4] latest commits --- cpp/daal/src/services/env_detect.cpp | 13 +++++-------- cpp/daal/src/threading/service_thread_pinner.cpp | 1 + cpp/daal/src/threading/threading.cpp | 11 +++++++++-- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/cpp/daal/src/services/env_detect.cpp b/cpp/daal/src/services/env_detect.cpp index 8a4d43d8ac5..61434c51fa3 100644 --- a/cpp/daal/src/services/env_detect.cpp +++ b/cpp/daal/src/services/env_detect.cpp @@ -127,12 +127,6 @@ DAAL_EXPORT void daal::services::Environment::setDynamicLibraryThreadingTypeOnWi DAAL_EXPORT daal::services::Environment::Environment() : _schedulerHandle {}, _globalControl {} { - // Initializes global oneapi::tbb::task_scheduler_handle object in oneDAL to prevent the unexpected - // destruction of the calling thread. - // When the oneapi::tbb::finalize function is called with an oneapi::tbb::task_scheduler_handle - // instance, it blocks the calling thread until the completion of all worker - // threads that were implicitly created by the library. - daal::setSchedulerHandle(&_schedulerHandle); _env.cpuid_init_flag = false; _env.cpuid = -1; this->setDefaultExecutionContext(internal::CpuExecutionContext()); @@ -143,6 +137,11 @@ DAAL_EXPORT daal::services::Environment::Environment(const Environment & e) : da DAAL_EXPORT void daal::services::Environment::initNumberOfThreads() { if (isInit) return; + // Initializes global oneapi::tbb::task_scheduler_handle object in oneDAL to prevent the unexpected + // destruction of the calling thread. + // When the oneapi::tbb::finalize function is called with an oneapi::tbb::task_scheduler_handle + // instance, it blocks the calling thread until the completion of all worker + // threads that were implicitly created by the library. daal::setSchedulerHandle(&_schedulerHandle); /* if HT enabled - set _numThreads to physical cores num */ @@ -163,8 +162,6 @@ DAAL_EXPORT void daal::services::Environment::initNumberOfThreads() DAAL_EXPORT daal::services::Environment::~Environment() { daal::services::daal_free_buffers(); - _daal_tbb_task_scheduler_free(_globalControl); - _daal_tbb_task_scheduler_handle_free(_schedulerHandle); } void daal::services::Environment::_cpu_detect(int enable) diff --git a/cpp/daal/src/threading/service_thread_pinner.cpp b/cpp/daal/src/threading/service_thread_pinner.cpp index 069a163c0a5..bb07d78e607 100755 --- a/cpp/daal/src/threading/service_thread_pinner.cpp +++ b/cpp/daal/src/threading/service_thread_pinner.cpp @@ -236,6 +236,7 @@ class thread_pinner_impl_t : public tbb::task_scheduler_observer thread_pinner_impl_t::thread_pinner_impl_t(void (*read_topo)(int &, int &, int &, int **), void (*deleter)(void *)) : pinner_arena(nthreads = daal::threader_get_threads_number()), tbb::task_scheduler_observer(pinner_arena), topo_deleter(deleter) { + pinner_arena.initialize(); do_pinning = (nthreads > 0) ? true : false; is_pinning.set(0); diff --git a/cpp/daal/src/threading/threading.cpp b/cpp/daal/src/threading/threading.cpp index cc1bff804c0..6fb48530331 100644 --- a/cpp/daal/src/threading/threading.cpp +++ b/cpp/daal/src/threading/threading.cpp @@ -66,24 +66,29 @@ DAAL_EXPORT void _threaded_scalable_free(void * ptr) DAAL_EXPORT void _daal_tbb_task_scheduler_free(void *& globalControl) { +#if defined(__DO_TBB_LAYER__) if (globalControl) { delete reinterpret_cast(globalControl); globalControl = nullptr; } +#endif } DAAL_EXPORT void _daal_tbb_task_scheduler_handle_free(void *& schedulerHandle) { +#if defined(__DO_TBB_LAYER__) if (schedulerHandle) { delete reinterpret_cast(schedulerHandle); schedulerHandle = nullptr; } +#endif } DAAL_EXPORT size_t _setNumberOfThreads(const size_t numThreads, void ** globalControl) { +#if defined(__DO_TBB_LAYER__) static tbb::spin_mutex mt; tbb::spin_mutex::scoped_lock lock(mt); if (numThreads != 0) @@ -93,16 +98,18 @@ DAAL_EXPORT size_t _setNumberOfThreads(const size_t numThreads, void ** globalCo daal::threader_env()->setNumberOfThreads(numThreads); return numThreads; } - +#endif daal::threader_env()->setNumberOfThreads(1); return 1; } DAAL_EXPORT size_t _setSchedulerHandle(void ** schedulerHandle) { - *schedulerHandle = reinterpret_cast(new tbb::task_scheduler_handle(tbb::attach {})); +#if defined(__DO_TBB_LAYER__) // It is necessary for initializing tbb in cases where DAAL does not use it. tbb::task_arena {}.initialize(); + *schedulerHandle = reinterpret_cast(new tbb::task_scheduler_handle(tbb::attach {})); +#endif return 0; } From e46c7e2d6ea4b7e9f5a88e73b8cd7d9eb39465a2 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Tue, 21 May 2024 05:11:06 -0700 Subject: [PATCH 3/4] fix for arm --- cpp/daal/src/services/env_detect.cpp | 15 +++++++++------ cpp/daal/src/threading/threading.cpp | 8 ++++++-- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/cpp/daal/src/services/env_detect.cpp b/cpp/daal/src/services/env_detect.cpp index 61434c51fa3..286416ed571 100644 --- a/cpp/daal/src/services/env_detect.cpp +++ b/cpp/daal/src/services/env_detect.cpp @@ -137,13 +137,14 @@ DAAL_EXPORT daal::services::Environment::Environment(const Environment & e) : da DAAL_EXPORT void daal::services::Environment::initNumberOfThreads() { if (isInit) return; - // Initializes global oneapi::tbb::task_scheduler_handle object in oneDAL to prevent the unexpected - // destruction of the calling thread. - // When the oneapi::tbb::finalize function is called with an oneapi::tbb::task_scheduler_handle - // instance, it blocks the calling thread until the completion of all worker - // threads that were implicitly created by the library. + // Initializes global oneapi::tbb::task_scheduler_handle object in oneDAL to prevent the unexpected + // destruction of the calling thread. + // When the oneapi::tbb::finalize function is called with an oneapi::tbb::task_scheduler_handle + // instance, it blocks the calling thread until the completion of all worker + // threads that were implicitly created by the library. +#if defined(TARGET_X86_64) daal::setSchedulerHandle(&_schedulerHandle); - +#endif /* if HT enabled - set _numThreads to physical cores num */ if (daal::internal::ServiceInst::serv_get_ht()) { @@ -176,7 +177,9 @@ void daal::services::Environment::_cpu_detect(int enable) DAAL_EXPORT void daal::services::Environment::setNumberOfThreads(const size_t numThreads) { isInit = true; +#if defined(TARGET_X86_64) daal::setSchedulerHandle(&_schedulerHandle); +#endif daal::setNumberOfThreads(numThreads, &_globalControl); } diff --git a/cpp/daal/src/threading/threading.cpp b/cpp/daal/src/threading/threading.cpp index 6fb48530331..d4ba88600c5 100644 --- a/cpp/daal/src/threading/threading.cpp +++ b/cpp/daal/src/threading/threading.cpp @@ -77,12 +77,14 @@ DAAL_EXPORT void _daal_tbb_task_scheduler_free(void *& globalControl) DAAL_EXPORT void _daal_tbb_task_scheduler_handle_free(void *& schedulerHandle) { -#if defined(__DO_TBB_LAYER__) +#if defined(TARGET_X86_64) + #if defined(__DO_TBB_LAYER__) if (schedulerHandle) { delete reinterpret_cast(schedulerHandle); schedulerHandle = nullptr; } + #endif #endif } @@ -105,10 +107,12 @@ DAAL_EXPORT size_t _setNumberOfThreads(const size_t numThreads, void ** globalCo DAAL_EXPORT size_t _setSchedulerHandle(void ** schedulerHandle) { -#if defined(__DO_TBB_LAYER__) +#if defined(TARGET_X86_64) + #if defined(__DO_TBB_LAYER__) // It is necessary for initializing tbb in cases where DAAL does not use it. tbb::task_arena {}.initialize(); *schedulerHandle = reinterpret_cast(new tbb::task_scheduler_handle(tbb::attach {})); + #endif #endif return 0; } From 5fcc819ff7ffa762cb784113cf229168d0ee19a9 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Tue, 21 May 2024 07:29:05 -0700 Subject: [PATCH 4/4] fixes --- cpp/daal/src/externals/core_threading_win_dll.cpp | 11 ----------- cpp/daal/src/threading/service_thread_pinner.cpp | 2 ++ cpp/daal/src/threading/threading.cpp | 2 +- 3 files changed, 3 insertions(+), 12 deletions(-) mode change 100755 => 100644 cpp/daal/src/threading/service_thread_pinner.cpp diff --git a/cpp/daal/src/externals/core_threading_win_dll.cpp b/cpp/daal/src/externals/core_threading_win_dll.cpp index db381db08ad..37c4f7d0e2b 100644 --- a/cpp/daal/src/externals/core_threading_win_dll.cpp +++ b/cpp/daal/src/externals/core_threading_win_dll.cpp @@ -663,17 +663,6 @@ DAAL_EXPORT void _daal_tbb_task_scheduler_free(void *& init) DAAL_EXPORT void _daal_tbb_task_scheduler_handle_free(void *& init) { - if (init == NULL) - { - // If threading library was not opened, there is nothing to free, - // so we do not need to load threading library. - // Moreover, loading threading library in the Environment destructor - // results in a crush because of the use of Wintrust library after it was unloaded. - // This happens due to undefined order of static objects deinitialization - // like Environment, and dependent libraries. - return; - } - load_daal_thr_dll(); if (_daal_tbb_task_scheduler_handle_free_ptr == NULL) { diff --git a/cpp/daal/src/threading/service_thread_pinner.cpp b/cpp/daal/src/threading/service_thread_pinner.cpp old mode 100755 new mode 100644 index bb07d78e607..9cc55190a37 --- a/cpp/daal/src/threading/service_thread_pinner.cpp +++ b/cpp/daal/src/threading/service_thread_pinner.cpp @@ -236,7 +236,9 @@ class thread_pinner_impl_t : public tbb::task_scheduler_observer thread_pinner_impl_t::thread_pinner_impl_t(void (*read_topo)(int &, int &, int &, int **), void (*deleter)(void *)) : pinner_arena(nthreads = daal::threader_get_threads_number()), tbb::task_scheduler_observer(pinner_arena), topo_deleter(deleter) { + #if defined(TARGET_X86_64) pinner_arena.initialize(); + #endif do_pinning = (nthreads > 0) ? true : false; is_pinning.set(0); diff --git a/cpp/daal/src/threading/threading.cpp b/cpp/daal/src/threading/threading.cpp index d4ba88600c5..f1bb8bd6162 100644 --- a/cpp/daal/src/threading/threading.cpp +++ b/cpp/daal/src/threading/threading.cpp @@ -109,9 +109,9 @@ DAAL_EXPORT size_t _setSchedulerHandle(void ** schedulerHandle) { #if defined(TARGET_X86_64) #if defined(__DO_TBB_LAYER__) + *schedulerHandle = reinterpret_cast(new tbb::task_scheduler_handle(tbb::attach {})); // It is necessary for initializing tbb in cases where DAAL does not use it. tbb::task_arena {}.initialize(); - *schedulerHandle = reinterpret_cast(new tbb::task_scheduler_handle(tbb::attach {})); #endif #endif return 0;