From 65d93229216acee4df76d0bfd13c8fcba24e2f8f Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Thu, 16 Jan 2025 07:57:31 -0800 Subject: [PATCH 01/14] init adding dpl --- WORKSPACE | 15 + cpp/daal/BUILD | 2 + cpp/oneapi/dal/BUILD | 1 + .../dal/backend/primitives/sort/sort_dpc.cpp | 341 ++++-------------- .../backend/primitives/sort/test/sort_dpc.cpp | 4 +- dev/bazel/deps/dpl.bzl | 27 ++ dev/bazel/deps/dpl.tpl.BUILD | 7 + makefile | 9 +- 8 files changed, 139 insertions(+), 267 deletions(-) create mode 100644 dev/bazel/deps/dpl.bzl create mode 100644 dev/bazel/deps/dpl.tpl.BUILD diff --git a/WORKSPACE b/WORKSPACE index ac3b7f57835..0422058e6aa 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -79,6 +79,21 @@ ccl_repo( ] ) +load("@onedal//dev/bazel/deps:dpl.bzl", "dpl_repo") +dpl_repo( + name = "dpl", + root_env_var = "DPL_ROOT", + urls = [ + "https://files.pythonhosted.org/packages/95/f6/18f78cb933e01ecd9e99d37a10da4971a795fcfdd1d24640799b4050fdbb/onedpl_devel-2022.7.1-py2.py3-none-manylinux_2_28_x86_64.whl", + ], + sha256s = [ + "3b270999d2464c5151aa0e7995dda9e896d072c75069ccee1efae9dc56bdc417", + ], + strip_prefixes = [ + "onedpl_devel-2022.7.1.data/data", + ], +) + load("@onedal//dev/bazel/deps:mkl.bzl", "mkl_repo") mkl_repo( name = "mkl", diff --git a/cpp/daal/BUILD b/cpp/daal/BUILD index 3ff445745df..ddd8004fadc 100644 --- a/cpp/daal/BUILD +++ b/cpp/daal/BUILD @@ -58,10 +58,12 @@ daal_module( "@config//:backend_ref": [ ":public_includes", "@openblas//:headers", + "@dpl//:headers", ], "//conditions:default": [ ":public_includes", "@mkl//:headers", + "@dpl//:headers", ], }), ) diff --git a/cpp/oneapi/dal/BUILD b/cpp/oneapi/dal/BUILD index 7a3ba863105..0a3d18bf3fd 100644 --- a/cpp/oneapi/dal/BUILD +++ b/cpp/oneapi/dal/BUILD @@ -31,6 +31,7 @@ dal_module( ], dpc_deps = [ "@mkl//:mkl_dpc", + "@dpl//:headers", ], ) diff --git a/cpp/oneapi/dal/backend/primitives/sort/sort_dpc.cpp b/cpp/oneapi/dal/backend/primitives/sort/sort_dpc.cpp index a68e4c6a1fb..fcdbe7111fc 100644 --- a/cpp/oneapi/dal/backend/primitives/sort/sort_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/sort/sort_dpc.cpp @@ -20,6 +20,8 @@ #include +#include + namespace oneapi::dal::backend::primitives { namespace de = dal::detail; @@ -34,202 +36,6 @@ inline std::uint64_t inv_bits(std::uint64_t x) { using sycl::ext::oneapi::plus; -template -sycl::event radix_sort_indices_inplace::radix_scan(sycl::queue& queue, - const ndview& val, - ndarray& part_hist, - Index elem_count, - std::uint32_t bit_offset, - std::int64_t local_size, - std::int64_t local_hist_count, - sycl::event& deps) { - ONEDAL_ASSERT(part_hist.get_count() == hist_buff_size_); - - const sycl::nd_range<1> nd_range = - make_multiple_nd_range_1d(de::check_mul_overflow(local_size, local_hist_count), local_size); - - const radix_integer_t* val_ptr = reinterpret_cast(val.get_data()); - Index* part_hist_ptr = part_hist.get_mutable_data(); - - auto event = queue.submit([&](sycl::handler& cgh) { - cgh.depends_on(deps); - cgh.parallel_for(nd_range, [=](sycl::nd_item<1> item) { - auto sbg = item.get_sub_group(); - if (sbg.get_group_id() > 0) { - return; - } - const std::uint32_t n_groups = item.get_group_range(0); - const std::uint32_t n_sub_groups = sbg.get_group_range()[0]; - const std::uint32_t n_total_sub_groups = n_sub_groups * n_groups; - const Index elems_for_sbg = - elem_count / n_total_sub_groups + bool(elem_count % n_total_sub_groups); - const std::uint32_t local_size = sbg.get_local_range()[0]; - - const std::uint32_t local_id = sbg.get_local_id(); - const std::uint32_t sub_group_id = sbg.get_group_id(); - const std::uint32_t group_id = item.get_group(0) * n_sub_groups + sub_group_id; - - Index ind_start = group_id * elems_for_sbg; - Index ind_end = - sycl::min(static_cast((group_id + 1) * elems_for_sbg), elem_count); - - Index offset[radix_range_]; - for (std::uint32_t i = 0; i < radix_range_; i++) { - offset[i] = 0; - } - - for (Index i = ind_start + local_id; i < ind_end; i += local_size) { - radix_integer_t data_bits = ((inv_bits(val_ptr[i]) >> bit_offset) & radix_range_1_); - for (std::uint32_t j = 0; j < radix_range_; j++) { - Index value = static_cast(data_bits == j); - Index partial_offset = sycl::reduce_over_group(sbg, value, plus()); - offset[j] += partial_offset; - } - } - - if (local_id == 0) { - for (std::uint32_t j = 0; j < radix_range_; j++) { - part_hist_ptr[group_id * radix_range_ + j] = offset[j]; - } - } - }); - }); - - return event; -} - -template -sycl::event radix_sort_indices_inplace::radix_hist_scan( - sycl::queue& queue, - const ndarray& part_hist, - ndarray& part_prefix_hist, - std::int64_t local_size, - std::int64_t local_hist_count, - sycl::event& deps) { - ONEDAL_ASSERT(part_hist.get_count() == hist_buff_size_); - ONEDAL_ASSERT(part_prefix_hist.get_count() == hist_buff_size_); - - const Index* part_hist_ptr = part_hist.get_data(); - Index* part_prefix_hist_ptr = part_prefix_hist.get_mutable_data(); - - const sycl::nd_range<1> nd_range = make_multiple_nd_range_1d(local_size, local_size); - - auto event = queue.submit([&](sycl::handler& cgh) { - cgh.depends_on(deps); - cgh.parallel_for(nd_range, [=](sycl::nd_item<1> item) { - auto sbg = item.get_sub_group(); - if (sbg.get_group_id() > 0) { - return; - } - - const std::uint32_t local_size = sbg.get_local_range()[0]; - const std::uint32_t local_id = sbg.get_local_id(); - - Index offset[radix_range_]; - for (std::uint32_t i = 0; i < radix_range_; i++) { - offset[i] = 0; - } - - for (std::uint32_t i = local_id; i < local_hist_count; i += local_size) { - for (std::uint32_t j = 0; j < radix_range_; j++) { - Index value = part_hist_ptr[i * radix_range_ + j]; - Index boundary = sycl::exclusive_scan_over_group(sbg, value, plus()); - part_prefix_hist_ptr[i * radix_range_ + j] = offset[j] + boundary; - Index partial_offset = sycl::reduce_over_group(sbg, value, plus()); - offset[j] += partial_offset; - } - } - - if (local_id == 0) { - Index total_sum = 0; - for (std::uint32_t j = 0; j < radix_range_; j++) { - part_prefix_hist_ptr[local_hist_count * radix_range_ + j] = total_sum; - total_sum += offset[j]; - } - } - }); - }); - - return event; -} - -template -sycl::event radix_sort_indices_inplace::radix_reorder( - sycl::queue& queue, - const ndview& val_in, - const ndview& ind_in, - const ndview& part_prefix_hist, - ndview& val_out, - ndview& ind_out, - Index elem_count, - std::uint32_t bit_offset, - std::int64_t local_size, - std::int64_t local_hist_count, - sycl::event& deps) { - ONEDAL_ASSERT(part_prefix_hist.get_count() == ((local_hist_count + 1) << radix_bits_)); - ONEDAL_ASSERT(val_in.get_count() == ind_in.get_count()); - ONEDAL_ASSERT(val_in.get_count() == val_out.get_count()); - ONEDAL_ASSERT(val_in.get_count() == ind_out.get_count()); - - const radix_integer_t* val_in_ptr = reinterpret_cast(val_in.get_data()); - const Index* ind_in_ptr = ind_in.get_data(); - const Index* part_prefix_hist_ptr = part_prefix_hist.get_data(); - radix_integer_t* val_out_ptr = reinterpret_cast(val_out.get_mutable_data()); - Index* ind_out_ptr = ind_out.get_mutable_data(); - - const sycl::nd_range<1> nd_range = - make_multiple_nd_range_1d(de::check_mul_overflow(local_size, local_hist_count), local_size); - - auto event = queue.submit([&](sycl::handler& cgh) { - cgh.depends_on(deps); - cgh.parallel_for(nd_range, [=](sycl::nd_item<1> item) { - auto sbg = item.get_sub_group(); - if (sbg.get_group_id() > 0) { - return; - } - - const std::uint32_t n_groups = item.get_group_range(0); - const std::uint32_t n_sub_groups = sbg.get_group_range()[0]; - const std::uint32_t n_total_sub_groups = n_sub_groups * n_groups; - const Index elems_for_sbg = - elem_count / n_total_sub_groups + bool(elem_count % n_total_sub_groups); - const std::uint32_t local_size = sbg.get_local_range()[0]; - - const std::uint32_t local_id = sbg.get_local_id(); - const std::uint32_t sub_group_id = sbg.get_group_id(); - const std::uint32_t group_id = item.get_group(0) * n_sub_groups + sub_group_id; - - Index ind_start = group_id * elems_for_sbg; - Index ind_end = - sycl::min(static_cast((group_id + 1) * elems_for_sbg), elem_count); - - Index offset[radix_range_]; - - for (std::uint32_t i = 0; i < radix_range_; i++) { - offset[i] = part_prefix_hist_ptr[group_id * radix_range_ + i] + - part_prefix_hist_ptr[n_total_sub_groups * radix_range_ + i]; - } - - for (Index i = ind_start + local_id; i < ind_end; i += local_size) { - radix_integer_t data_value = val_in_ptr[i]; - radix_integer_t data_bits = ((inv_bits(data_value) >> bit_offset) & radix_range_1_); - Index pos_new = 0; - for (std::uint32_t j = 0; j < radix_range_; j++) { - Index value = static_cast(data_bits == j); - Index boundary = sycl::exclusive_scan_over_group(sbg, value, plus()); - pos_new |= value * (offset[j] + boundary); - Index partial_offset = sycl::reduce_over_group(sbg, value, plus()); - offset[j] = offset[j] + partial_offset; - } - val_out_ptr[pos_new] = data_value; - ind_out_ptr[pos_new] = ind_in_ptr[i]; - } - }); - }); - - return event; -} - template radix_sort_indices_inplace::radix_sort_indices_inplace(const sycl::queue& queue) : queue_(queue), @@ -276,75 +82,82 @@ sycl::event radix_sort_indices_inplace::operator()(ndview de::limits::max()) { throw domain_error(dal::detail::error_messages::invalid_number_of_elements_to_sort()); } + auto event = oneapi::dpl::experimental::kt::gpu::esimd::radix_sort_by_key( + queue_, + val_in.get_mutable_data(), + val_in.get_mutable_data() + val_in.get_count(), + ind_in.get_mutable_data(), + dpl::experimental::kt::kernel_param<256, 32>{}); + return event; - sycl::event::wait_and_throw(deps); - sort_event_.wait_and_throw(); - - init(queue_, val_in.get_count()); - - std::uint32_t rev = 1; - - sycl::event res_deps = {}; - for (std::uint32_t bit_offset = 0; bit_offset < byte_range_ * sizeof(Float); - bit_offset += radix_bits_, rev ^= 1) { - if (rev) { - auto scan_deps = radix_scan(queue_, - val_in, - part_hist_, - elem_count_, - bit_offset, - local_size_, - local_hist_count_, - res_deps); - auto hist_scan_deps = radix_hist_scan(queue_, - part_hist_, - part_prefix_hist_, - local_size_, - local_hist_count_, - scan_deps); - res_deps = radix_reorder(queue_, - val_in, - ind_in, - part_prefix_hist_, - val_buff_, - ind_buff_, - elem_count_, - bit_offset, - local_size_, - local_hist_count_, - hist_scan_deps); - } - else { - auto scan_deps = radix_scan(queue_, - val_buff_, - part_hist_, - elem_count_, - bit_offset, - local_size_, - local_hist_count_, - res_deps); - auto hist_scan_deps = radix_hist_scan(queue_, - part_hist_, - part_prefix_hist_, - local_size_, - local_hist_count_, - scan_deps); - res_deps = radix_reorder(queue_, - val_buff_, - ind_buff_, - part_prefix_hist_, - val_in, - ind_in, - elem_count_, - bit_offset, - local_size_, - local_hist_count_, - hist_scan_deps); - } - } - - sort_event_ = res_deps; - return res_deps; + // sycl::event::wait_and_throw(deps); + // sort_event_.wait_and_throw(); + + // init(queue_, val_in.get_count()); + + // std::uint32_t rev = 1; + + // sycl::event res_deps = {}; + // for (std::uint32_t bit_offset = 0; bit_offset < byte_range_ * sizeof(Float); + // bit_offset += radix_bits_, rev ^= 1) { + // if (rev) { + // auto scan_deps = radix_scan(queue_, + // val_in, + // part_hist_, + // elem_count_, + // bit_offset, + // local_size_, + // local_hist_count_, + // res_deps); + // auto hist_scan_deps = radix_hist_scan(queue_, + // part_hist_, + // part_prefix_hist_, + // local_size_, + // local_hist_count_, + // scan_deps); + // res_deps = radix_reorder(queue_, + // val_in, + // ind_in, + // part_prefix_hist_, + // val_buff_, + // ind_buff_, + // elem_count_, + // bit_offset, + // local_size_, + // local_hist_count_, + // hist_scan_deps); + // } + // else { + // auto scan_deps = radix_scan(queue_, + // val_buff_, + // part_hist_, + // elem_count_, + // bit_offset, + // local_size_, + // local_hist_count_, + // res_deps); + // auto hist_scan_deps = radix_hist_scan(queue_, + // part_hist_, + // part_prefix_hist_, + // local_size_, + // local_hist_count_, + // scan_deps); + // res_deps = radix_reorder(queue_, + // val_buff_, + // ind_buff_, + // part_prefix_hist_, + // val_in, + // ind_in, + // elem_count_, + // bit_offset, + // local_size_, + // local_hist_count_, + // hist_scan_deps); + // } + // } + + // sort_event_ = res_deps; + // return res_deps; } template diff --git a/cpp/oneapi/dal/backend/primitives/sort/test/sort_dpc.cpp b/cpp/oneapi/dal/backend/primitives/sort/test/sort_dpc.cpp index 8875f828975..b8b4afa8fe4 100644 --- a/cpp/oneapi/dal/backend/primitives/sort/test/sort_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/sort/test/sort_dpc.cpp @@ -21,6 +21,8 @@ #include "oneapi/dal/test/engine/math.hpp" #include "oneapi/dal/backend/primitives/sort/sort.hpp" +#include + namespace oneapi::dal::backend::primitives::test { namespace te = dal::test::engine; @@ -201,7 +203,7 @@ TEMPLATE_LIST_TEST_M(sort_with_indices_test, sort_indices_types) { SKIP_IF(this->get_policy().is_cpu()); - std::int64_t elem_count = GENERATE_COPY(2, 10000); + std::int64_t elem_count = GENERATE_COPY(100, 1000, 10000, 100000, 10000000); auto [val, ind] = this->allocate_arrays(elem_count); this->fill_uniform(val, -25., 25.); diff --git a/dev/bazel/deps/dpl.bzl b/dev/bazel/deps/dpl.bzl new file mode 100644 index 00000000000..727dafcf62a --- /dev/null +++ b/dev/bazel/deps/dpl.bzl @@ -0,0 +1,27 @@ +#=============================================================================== +# Copyright 2025 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +load("@onedal//dev/bazel:repos.bzl", "repos") + +dpl_repo = repos.prebuilt_libs_repo_rule( + includes = [ + "include", + ], + libs = [ + "lib", + ], + build_template = "@onedal//dev/bazel/deps:dpl.tpl.BUILD", +) \ No newline at end of file diff --git a/dev/bazel/deps/dpl.tpl.BUILD b/dev/bazel/deps/dpl.tpl.BUILD new file mode 100644 index 00000000000..b5d588a37fa --- /dev/null +++ b/dev/bazel/deps/dpl.tpl.BUILD @@ -0,0 +1,7 @@ +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "headers", + hdrs = glob(["include/**/**/*"]), + includes = [ "include" ], +) diff --git a/makefile b/makefile index 8ef5d75fe03..4af44c84db9 100644 --- a/makefile +++ b/makefile @@ -314,6 +314,11 @@ ifeq ($(REQPROFILE), yes) VTUNESDK.LIBS_A := $(if $(OS_is_lnx), $(VTUNESDK.libia)/libittnotify.a,) endif + +#=============================== oneDPL folders ====================================== + +ONEDPL.include := $(DPL_ROOT)/include + #=============================================================================== # Release library names #=============================================================================== @@ -447,7 +452,7 @@ CORE.srcdirs := $(CORE.SERV.srcdir) $(CORE.srcdir) \ $(CPPDIR.daal)/src/data_management CORE.incdirs.common := $(RELEASEDIR.include) $(CPPDIR.daal) $(WORKDIR) -CORE.incdirs.thirdp := $(daaldep.math_backend.incdir) $(VTUNESDK.include) $(TBBDIR.include) +CORE.incdirs.thirdp := $(daaldep.math_backend.incdir) $(VTUNESDK.include) $(ONEDPL.include) $(TBBDIR.include) CORE.incdirs := $(CORE.incdirs.common) $(CORE.incdirs.thirdp) $(info CORE.incdirs: $(CORE.incdirs)) @@ -565,7 +570,7 @@ PARAMETERS.tmpdir_a.dpc := $(WORKDIR)/parameters_dpc_static PARAMETERS.tmpdir_y.dpc := $(WORKDIR)/parameters_dpc_dynamic ONEAPI.incdirs.common := $(CPPDIR) -ONEAPI.incdirs.thirdp := $(CORE.incdirs.common) $(daaldep.math_backend_oneapi.incdir) $(VTUNESDK.include) $(TBBDIR.include) +ONEAPI.incdirs.thirdp := $(CORE.incdirs.common) $(daaldep.math_backend_oneapi.incdir) $(VTUNESDK.include) $(ONEDPL.include) $(TBBDIR.include) ONEAPI.incdirs := $(ONEAPI.incdirs.common) $(CORE.incdirs.thirdp) $(ONEAPI.incdirs.thirdp) ONEAPI.dispatcher_cpu = $(WORKDIR)/oneapi/dal/_dal_cpu_dispatcher_gen.hpp From f8028b7263dfd96b98921e17f3adf2d484eb40a3 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Mon, 20 Jan 2025 06:51:40 -0800 Subject: [PATCH 02/14] fixes for dpl --- .ci/env/apt.sh | 7 + .ci/pipeline/ci.yml | 8 +- .../backend/gpu/train_feature_type_dpc.cpp | 9 +- .../gpu/empty_cluster_handling_dpc.cpp | 4 +- .../backend/gpu/working_set_selector_dpc.cpp | 7 +- .../dal/backend/primitives/sort/sort.hpp | 134 +----- .../dal/backend/primitives/sort/sort_dpc.cpp | 384 ++++-------------- .../backend/primitives/sort/test/sort_dpc.cpp | 4 +- .../primitives/voting/large_k_uniform_dpc.cpp | 8 +- .../dal/backend/primitives/voting/uniform.hpp | 1 - 10 files changed, 124 insertions(+), 442 deletions(-) diff --git a/.ci/env/apt.sh b/.ci/env/apt.sh index 449ea1a6893..f3f95a5eb7f 100755 --- a/.ci/env/apt.sh +++ b/.ci/env/apt.sh @@ -43,6 +43,10 @@ function install_mkl { install_tbb } +function install_dpl { + sudo apt-get install -y intel-oneapi-dpl-2022.7.0 +} + function install_clang-format { sudo apt-get install -y clang-format-14 } @@ -108,6 +112,9 @@ elif [ "${component}" == "tbb" ]; then elif [ "${component}" == "mkl" ]; then add_repo install_mkl +elif [ "${component}" == "dpl" ]; then + add_repo + install_dpl elif [ "${component}" == "gnu-cross-compilers" ]; then update install_gnu-cross-compilers "$2" diff --git a/.ci/pipeline/ci.yml b/.ci/pipeline/ci.yml index fc63e1f9fed..1012eff6991 100755 --- a/.ci/pipeline/ci.yml +++ b/.ci/pipeline/ci.yml @@ -29,7 +29,7 @@ variables: VM_IMAGE : 'ubuntu-22.04' SYSROOT_OS: 'jammy' WINDOWS_BASEKIT_URL: 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe' - WINDOWS_DPCPP_COMPONENTS: 'intel.oneapi.win.mkl.devel:intel.oneapi.win.tbb.devel' + WINDOWS_DPCPP_COMPONENTS: 'intel.oneapi.win.mkl.devel:intel.oneapi.win.tbb.devel:intel.oneapi.win.dpl' resources: repositories: @@ -71,6 +71,9 @@ jobs: - script: | .ci/env/apt.sh mkl displayName: 'mkl installation' + - script: | + .ci/env/apt.sh dpl + displayName: 'dpl installation' - script: | source /opt/intel/oneapi/setvars.sh .ci/scripts/describe_system.sh @@ -393,6 +396,9 @@ jobs: - script: | .ci/env/apt.sh mkl displayName: 'mkl installation' + - script: | + .ci/env/apt.sh dpl + displayName: 'dpl installation' - script: | source /opt/intel/oneapi/setvars.sh .ci/scripts/describe_system.sh diff --git a/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_feature_type_dpc.cpp b/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_feature_type_dpc.cpp index 7306533ed50..44ef76fd331 100644 --- a/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_feature_type_dpc.cpp +++ b/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_feature_type_dpc.cpp @@ -35,7 +35,7 @@ inline sycl::event sort_inplace(sycl::queue& queue_, const bk::event_vector& deps = {}) { ONEDAL_ASSERT(src.get_count() > 0); auto src_ind = pr::ndarray::empty(queue_, { src.get_count() }); - return pr::radix_sort_indices_inplace{ queue_ }(src, src_ind, deps); + return pr::radix_sort_indices_inplace(queue_, src, src_ind, deps); } template @@ -429,13 +429,14 @@ sycl::event indexed_features::operator()(const table& tbl, pr::ndarray::empty(queue_, { row_count_ }, sycl::usm::alloc::device); } - pr::radix_sort_indices_inplace sort{ queue_ }; - sycl::event last_event; for (Index i = 0; i < column_count_; i++) { last_event = extract_column(data_nd_, values_nd, indices_nd, i, { last_event }); - last_event = sort(values_nd, indices_nd, { last_event }); + last_event = pr::radix_sort_indices_inplace(queue_, + values_nd, + indices_nd, + { last_event }); last_event = compute_bins(values_nd, indices_nd, column_bin_vec_[i], entries_[i], i, { last_event }); } diff --git a/cpp/oneapi/dal/algo/kmeans/backend/gpu/empty_cluster_handling_dpc.cpp b/cpp/oneapi/dal/algo/kmeans/backend/gpu/empty_cluster_handling_dpc.cpp index c553aae498e..e3deb9f43d3 100644 --- a/cpp/oneapi/dal/algo/kmeans/backend/gpu/empty_cluster_handling_dpc.cpp +++ b/cpp/oneapi/dal/algo/kmeans/backend/gpu/empty_cluster_handling_dpc.cpp @@ -56,8 +56,8 @@ static auto fill_candidate_indices_and_distances(sycl::queue& queue, }); }); - pr::radix_sort_indices_inplace radix_sort{ queue }; - auto sort_event = radix_sort(values, indices, { fill_event }); + auto sort_event = + pr::radix_sort_indices_inplace(queue, values, indices, { fill_event }); auto copy_event = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(sort_event); diff --git a/cpp/oneapi/dal/algo/svm/backend/gpu/working_set_selector_dpc.cpp b/cpp/oneapi/dal/algo/svm/backend/gpu/working_set_selector_dpc.cpp index 10d9e3bea4c..eb9bb5a3dce 100644 --- a/cpp/oneapi/dal/algo/svm/backend/gpu/working_set_selector_dpc.cpp +++ b/cpp/oneapi/dal/algo/svm/backend/gpu/working_set_selector_dpc.cpp @@ -197,9 +197,12 @@ sycl::event working_set_selector::sort_f_indices(sycl::queue& q, auto copy_event = dal::backend::copy(q, tmp_sort_ptr, f_ptr, row_count_, deps); auto arange_event = sorted_f_indices_.arange(q); - auto radix_sort = pr::radix_sort_indices_inplace{ q }; + auto radix_sort_event = - radix_sort(tmp_sort_values_, sorted_f_indices_, { copy_event, arange_event }); + pr::radix_sort_indices_inplace(q, + tmp_sort_values_, + sorted_f_indices_, + { copy_event, arange_event }); return radix_sort_event; } diff --git a/cpp/oneapi/dal/backend/primitives/sort/sort.hpp b/cpp/oneapi/dal/backend/primitives/sort/sort.hpp index 47f40b92cdf..a626e4eaa03 100644 --- a/cpp/oneapi/dal/backend/primitives/sort/sort.hpp +++ b/cpp/oneapi/dal/backend/primitives/sort/sort.hpp @@ -36,130 +36,24 @@ struct float2uint_map { using integer_t = std::uint64_t; }; -/// @tparam Float Floating-point type used for storing input values -/// @tparam Index Integer type used for storing input indices template -class radix_sort_indices_inplace { - static_assert(std::is_same_v || std::is_same_v); - using radix_integer_t = typename float2uint_map::integer_t; +sycl::event radix_sort_indices_inplace(sycl::queue& queue, + ndview& val, + ndview& ind, + const event_vector& deps = {}); -public: - /// Performs initialization of auxiliary variables and required auxiliary buffers - /// - /// @param[in] queue The queue - /// @param[in] elem_count The number of elements in input vector - radix_sort_indices_inplace(const sycl::queue& queue); - radix_sort_indices_inplace(const radix_sort_indices_inplace&) = delete; - ~radix_sort_indices_inplace(); - radix_sort_indices_inplace& operator=(const radix_sort_indices_inplace&) = delete; - - /// Performs inplace radix sort of input vector and corresponding indices - /// NOTE: auxiliary buffers and variables are reset in case if number of elements in val - /// differs from the number of elements provided in constructor - /// - /// @param[in, out] val The [n] input/output vector of values to sort out - /// @param[in, out] ind The [n] input/output vector of corresponding indices - sycl::event operator()(ndview& val, - ndview& ind, - const event_vector& deps = {}); - -private: - void init(sycl::queue& queue, std::int64_t elem_count); - sycl::event radix_scan(sycl::queue& queue, - const ndview& val, - ndarray& part_hist, - Index elem_count, - std::uint32_t bit_offset, - std::int64_t local_size, - std::int64_t local_hist_count, - sycl::event& deps); - sycl::event radix_hist_scan(sycl::queue& queue, - const ndarray& part_hist, - ndarray& part_prefix_hist, - std::int64_t local_size, - std::int64_t local_hist_count, - sycl::event& deps); - sycl::event radix_reorder(sycl::queue& queue, - const ndview& val_in, - const ndview& ind_in, - const ndview& part_prefix_hist, - ndview& val_out, - ndview& ind_out, - Index elem_count, - std::uint32_t bit_offset, - std::int64_t local_size, - std::int64_t local_hist_count, - sycl::event& deps); - - sycl::queue queue_; - sycl::event sort_event_; - - ndarray val_buff_; - ndarray ind_buff_; - - ndarray part_hist_; - ndarray part_prefix_hist_; - - std::uint32_t elem_count_; - std::uint32_t local_size_; - std::uint32_t local_hist_count_; - std::uint32_t hist_buff_size_; - - static constexpr inline std::uint32_t radix_bits_ = 4; - static constexpr inline std::uint32_t radix_range_ = (std::uint32_t)1 << radix_bits_; - static constexpr inline std::uint32_t radix_range_1_ = radix_range_ - 1; - - static constexpr inline std::uint32_t byte_range_ = 8; - static constexpr inline std::uint32_t max_local_hist_count_ = 1024; - static constexpr inline std::uint32_t preferable_sbg_size_ = 16; -}; - -/// @tparam Integer Integer type used for storing input values template -class radix_sort { -public: - /// Performs initialization of auxiliary variables and required auxiliary buffers - /// - /// @param[in] queue The queue - /// @param[in] vector_count The number of vectors (rows) in input array - radix_sort(const sycl::queue& queue); - radix_sort(const radix_sort&) = delete; - ~radix_sort(); - radix_sort& operator=(const radix_sort&) = delete; - - /// Performs radix sort of batch of integer input vectors - /// NOTE: only positive values are supported for now. - /// Auxiliary buffers and variables are reset in case if number of elements in val - /// differs from the number of elements provided in constructor - /// - /// @param[in] val_in The [n x p] input array of vectors (row major format) to sort out, - /// is also used for temporary data storage - /// @param[out] val_out The [n x p] output array of sorted vectors (row major format) - /// @param[in] sorted_elem_count The number of elements to sort in each vector - /// TODO: Extend interface with strided (not dense) input & output arrays - sycl::event operator()(ndview& val_in, - ndview& val_out, - std::int64_t sorted_elem_count, - const event_vector& deps = {}); - - sycl::event operator()(ndview& val_in, - ndview& val_out, - const event_vector& deps = {}); - -private: - void init(sycl::queue& queue, std::int64_t vector_count); - - sycl::queue queue_; - sycl::event sort_event_; +sycl::event radix_sort(sycl::queue& queue, + ndview& val_in, + ndview& val_out, + std::int64_t sorted_elem_count, + const event_vector& deps = {}); - ndarray buffer_; - - std::uint32_t vector_count_; - - static constexpr inline std::uint32_t preferable_wg_size_ = 32; - static constexpr inline std::uint32_t radix_range_ = 256; - static constexpr inline std::uint32_t radix_count_ = sizeof(Integer); -}; +template +sycl::event radix_sort(sycl::queue& queue, + ndview& val_in, + ndview& val_out, + const event_vector& deps = {}); #endif diff --git a/cpp/oneapi/dal/backend/primitives/sort/sort_dpc.cpp b/cpp/oneapi/dal/backend/primitives/sort/sort_dpc.cpp index fcdbe7111fc..11069f26580 100644 --- a/cpp/oneapi/dal/backend/primitives/sort/sort_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/sort/sort_dpc.cpp @@ -20,61 +20,25 @@ #include +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-variable" +#pragma clang diagnostic ignored "-Wunused-local-typedef" + #include +#pragma clang diagnostic pop namespace oneapi::dal::backend::primitives { namespace de = dal::detail; -inline std::uint32_t inv_bits(std::uint32_t x) { - return x ^ (-(x >> 31) | 0x80000000u); -} - -inline std::uint64_t inv_bits(std::uint64_t x) { - return x ^ (-(x >> 63) | 0x8000000000000000ul); -} - using sycl::ext::oneapi::plus; template -radix_sort_indices_inplace::radix_sort_indices_inplace(const sycl::queue& queue) - : queue_(queue), - elem_count_(0) {} - -template -radix_sort_indices_inplace::~radix_sort_indices_inplace() { - sort_event_.wait_and_throw(); -} - -template -void radix_sort_indices_inplace::init(sycl::queue& queue, std::int64_t elem_count) { - ONEDAL_ASSERT(elem_count > 0); - ONEDAL_ASSERT(elem_count <= de::limits::max()); - - const std::uint32_t uint_elem_count = de::integral_cast(elem_count); - if (elem_count_ != uint_elem_count) { - elem_count_ = uint_elem_count; - local_size_ = preferable_sbg_size_; - local_hist_count_ = de::check_mul_overflow(max_local_hist_count_, local_size_) < elem_count_ - ? max_local_hist_count_ - : (elem_count_ / local_size_) + bool(elem_count_ % local_size_); - - hist_buff_size_ = (local_hist_count_ + 1) << radix_bits_; - - part_hist_ = ndarray::empty(queue, { hist_buff_size_ }, sycl::usm::alloc::device); - part_prefix_hist_ = - ndarray::empty(queue, { hist_buff_size_ }, sycl::usm::alloc::device); - val_buff_ = ndarray::empty(queue_, { elem_count_ }, sycl::usm::alloc::device); - - ind_buff_ = ndarray::empty(queue_, { elem_count_ }, sycl::usm::alloc::device); - } -} - -template -sycl::event radix_sort_indices_inplace::operator()(ndview& val_in, - ndview& ind_in, - const event_vector& deps) { - ONEDAL_PROFILER_TASK(sort.radix_sort_indices_inplace, queue_); +sycl::event radix_sort_indices_inplace(sycl::queue& queue, + ndview& val_in, + ndview& ind_in, + const event_vector& deps) { + ONEDAL_PROFILER_TASK(sort.radix_sort_indices_inplace, queue); ONEDAL_ASSERT(val_in.has_mutable_data()); ONEDAL_ASSERT(ind_in.has_mutable_data()); ONEDAL_ASSERT(val_in.get_count() == ind_in.get_count()); @@ -82,281 +46,89 @@ sycl::event radix_sort_indices_inplace::operator()(ndview de::limits::max()) { throw domain_error(dal::detail::error_messages::invalid_number_of_elements_to_sort()); } + auto event = oneapi::dpl::experimental::kt::gpu::esimd::radix_sort_by_key( - queue_, + queue, val_in.get_mutable_data(), val_in.get_mutable_data() + val_in.get_count(), ind_in.get_mutable_data(), dpl::experimental::kt::kernel_param<256, 32>{}); return event; - - // sycl::event::wait_and_throw(deps); - // sort_event_.wait_and_throw(); - - // init(queue_, val_in.get_count()); - - // std::uint32_t rev = 1; - - // sycl::event res_deps = {}; - // for (std::uint32_t bit_offset = 0; bit_offset < byte_range_ * sizeof(Float); - // bit_offset += radix_bits_, rev ^= 1) { - // if (rev) { - // auto scan_deps = radix_scan(queue_, - // val_in, - // part_hist_, - // elem_count_, - // bit_offset, - // local_size_, - // local_hist_count_, - // res_deps); - // auto hist_scan_deps = radix_hist_scan(queue_, - // part_hist_, - // part_prefix_hist_, - // local_size_, - // local_hist_count_, - // scan_deps); - // res_deps = radix_reorder(queue_, - // val_in, - // ind_in, - // part_prefix_hist_, - // val_buff_, - // ind_buff_, - // elem_count_, - // bit_offset, - // local_size_, - // local_hist_count_, - // hist_scan_deps); - // } - // else { - // auto scan_deps = radix_scan(queue_, - // val_buff_, - // part_hist_, - // elem_count_, - // bit_offset, - // local_size_, - // local_hist_count_, - // res_deps); - // auto hist_scan_deps = radix_hist_scan(queue_, - // part_hist_, - // part_prefix_hist_, - // local_size_, - // local_hist_count_, - // scan_deps); - // res_deps = radix_reorder(queue_, - // val_buff_, - // ind_buff_, - // part_prefix_hist_, - // val_in, - // ind_in, - // elem_count_, - // bit_offset, - // local_size_, - // local_hist_count_, - // hist_scan_deps); - // } - // } - - // sort_event_ = res_deps; - // return res_deps; } template -radix_sort::radix_sort(const sycl::queue& queue) : queue_(queue), - vector_count_(0) {} - -template -radix_sort::~radix_sort() { - sort_event_.wait_and_throw(); -} - -template -void radix_sort::init(sycl::queue& queue, std::int64_t vector_count) { - ONEDAL_ASSERT(vector_count > 0); - ONEDAL_ASSERT(vector_count <= de::limits::max()); - - const std::uint32_t uint_vector_count = de::integral_cast(vector_count); - if (vector_count_ != uint_vector_count) { - vector_count_ = uint_vector_count; - - buffer_ = ndarray::empty(queue_, - { vector_count_, radix_range_ }, - sycl::usm::alloc::device); +sycl::event radix_sort(sycl::queue& queue, + ndview& val_in, + ndview& val_out, + std::int64_t sorted_elem_count, + const event_vector& deps) { + ONEDAL_PROFILER_TASK(sort.radix_sort, queue); + + const auto row_count = val_in.get_dimension(0); + const auto col_count = val_in.get_dimension(1); + sycl::event radix_sort_event; + + for (std::int64_t row = 0; row < row_count; ++row) { + Integer* row_start_in = val_in.get_mutable_data() + row * col_count; + Integer* row_start_out = val_out.get_mutable_data() + row * col_count; + + const auto row_sorted_elem_count = std::min(sorted_elem_count, col_count); + + radix_sort_event = oneapi::dpl::experimental::kt::gpu::esimd::radix_sort( + queue, + row_start_in, + row_start_in + row_sorted_elem_count, + row_start_out, + dpl::experimental::kt::kernel_param<256, 32>{}); } -} - -template -sycl::event radix_sort::operator()(ndview& val_in, - ndview& val_out, - std::int64_t sorted_elem_count, - const event_vector& deps) { - ONEDAL_PROFILER_TASK(sort.radix_sort, queue_); - // radixBuf should be big enough to accumulate radix_range elements - ONEDAL_ASSERT(val_in.get_dimension(1) > 0); - ONEDAL_ASSERT(sorted_elem_count > 0); - ONEDAL_ASSERT(val_in.get_dimension(0) == val_out.get_dimension(0)); - ONEDAL_ASSERT(val_in.get_dimension(1) == val_out.get_dimension(1)); - ONEDAL_ASSERT(val_out.has_mutable_data()); - - if (val_in.get_dimension(0) > de::limits::max()) { - throw domain_error(dal::detail::error_messages::invalid_range_of_rows()); - } - if (val_in.get_dimension(1) > de::limits::max()) { - throw domain_error(dal::detail::error_messages::invalid_range_of_columns()); - } - if (sorted_elem_count > de::limits::max()) { - throw domain_error(dal::detail::error_messages::invalid_number_of_elements_to_sort()); - } - - sort_event_.wait_and_throw(); - init(queue_, val_in.get_dimension(0)); - - const std::uint32_t vector_count = de::integral_cast(val_in.get_dimension(0)); - const std::uint32_t vector_offset = de::integral_cast(val_in.get_dimension(1)); - - const std::uint32_t _sorted_elem_count = de::integral_cast(sorted_elem_count); - - Integer* labels = val_in.get_mutable_data(); - Integer* sorted = val_out.get_mutable_data(); - Integer* radixbuf = buffer_.get_mutable_data(); - - const sycl::nd_range<2> nd_range = - make_multiple_nd_range_2d({ vector_count, preferable_wg_size_ }, - { 1, preferable_wg_size_ }); - - sort_event_ = queue_.submit([&](sycl::handler& cgh) { - cgh.depends_on(deps); - cgh.parallel_for(nd_range, [=](sycl::nd_item<2> item) { - auto sbg = item.get_sub_group(); - // Code is written for a single subgroup. It's necessary to adjust the local range if idle subgoups are presented - if (sbg.get_group_id()[0] > 0) { - return; - } - - const std::uint32_t global_id = item.get_global_id()[0]; - const std::uint32_t local_id = item.get_local_id()[1]; - - const std::uint32_t local_size = sbg.get_local_range()[0]; - const std::uint32_t group_aligned_size = - _sorted_elem_count - _sorted_elem_count % local_size; - const std::uint32_t rem = _sorted_elem_count - group_aligned_size; - - Integer* input = &labels[global_id * vector_offset]; - Integer* output = &sorted[global_id * vector_offset]; - Integer* counters = &radixbuf[global_id * radix_range_]; - // Radix sort - for (std::uint32_t i = 0; i < radix_count_; i++) { - std::uint8_t* cinput = reinterpret_cast(input); - for (std::uint32_t j = local_id; j < radix_range_; j += local_size) - counters[j] = 0; - // Count elements in sub group to write once per value - for (std::uint32_t j = local_id; j < group_aligned_size + local_size; - j += local_size) { - bool exists = j < group_aligned_size || local_id < rem; - std::uint8_t c = exists ? cinput[j * radix_count_ + i] : 0; - std::uint32_t entry = 0; - bool entry_found = false; - for (std::uint32_t k = 0; k < local_size; k++) { - bool correct = j < group_aligned_size || k < rem; - std::uint32_t done = sycl::group_broadcast(sbg, correct ? 0 : 1, k); - if (done) - break; - std::uint8_t value = sycl::group_broadcast(sbg, c, k); - if (!entry_found && value == c) { - entry = k; - entry_found = true; - } - Integer count = sycl::reduce_over_group( - sbg, - static_cast(exists && value == c ? 1 : 0), - plus()); - if (entry_found && entry == local_id && entry == k) { - counters[value] += count; - } - } - sycl::group_barrier(sbg); - } - // Parallel scan on counters to generate offsets in place - Integer offset = 0; - for (std::uint32_t j = local_id; j < radix_range_; j += local_size) { - Integer value = counters[j]; - Integer boundary = sycl::exclusive_scan_over_group(sbg, value, plus()); - counters[j] = offset + boundary; - Integer partial_offset = sycl::reduce_over_group(sbg, value, plus()); - offset += partial_offset; - } - - sycl::group_barrier(sbg); - for (std::uint32_t j = local_id; j < group_aligned_size + local_size; - j += local_size) { - bool exists = j < group_aligned_size || local_id < rem; - std::uint8_t c = exists ? cinput[j * radix_count_ + i] : 0; - Integer local_offset = 0; - std::uint32_t entry = 0; - bool entry_found = false; - - for (std::uint32_t k = 0; k < local_size; k++) { - bool correct = j < group_aligned_size || k < rem; - std::uint32_t done = sycl::group_broadcast(sbg, correct ? 0 : 1, k); - if (done) - break; - std::uint32_t skip = sycl::group_broadcast(sbg, entry_found ? 1 : 0, k); - if (skip) - continue; - std::uint8_t value = sycl::group_broadcast(sbg, c, k); - if (!entry_found && value == c) { - entry = k; - entry_found = true; - } - Integer offset = sycl::exclusive_scan_over_group( - sbg, - static_cast(exists && value == c ? 1 : 0), - plus()); - if (value == c) { - local_offset = offset + counters[value]; - } - Integer count = sycl::reduce_over_group( - sbg, - static_cast(exists && value == c ? 1 : 0), - plus()); - if (entry_found && entry == local_id && entry == k) { - counters[value] += count; - } - } - sycl::group_barrier(sbg); - if (exists) - output[local_offset] = input[j]; - } - std::swap(input, output); - } - for (std::uint32_t i = local_id; i < _sorted_elem_count; i += local_size) - output[i] = input[i]; - }); - }); - - return sort_event_; + return radix_sort_event; } template -sycl::event radix_sort::operator()(ndview& val_in, - ndview& val_out, - const event_vector& deps) { - ONEDAL_PROFILER_TASK(sort.radix_sort, queue_); - return this->operator()(val_in, val_out, val_in.get_dimension(1), deps); +sycl::event radix_sort(sycl::queue& queue, + ndview& val_in, + ndview& val_out, + const event_vector& deps) { + ONEDAL_PROFILER_TASK(sort.radix_sort, queue); + return radix_sort(queue, val_in, val_out, val_in.get_dimension(1), deps); } -#define INSTANTIATE_SORT_INDICES(F, I) \ - template class ONEDAL_EXPORT radix_sort_indices_inplace; - -#define INSTANTIATE_SORT(I) template class ONEDAL_EXPORT radix_sort; - -INSTANTIATE_SORT_INDICES(float, std::uint32_t) -INSTANTIATE_SORT_INDICES(double, std::uint32_t) -INSTANTIATE_SORT_INDICES(float, std::int32_t) -INSTANTIATE_SORT_INDICES(double, std::int32_t) +#define INSTANTIATE_SORT_INDICES(Float, Index) \ + template ONEDAL_EXPORT sycl::event radix_sort_indices_inplace( \ + sycl::queue&, \ + ndview&, \ + ndview&, \ + const event_vector&); + +#define INSTANTIATE_FLOAT(Index) \ + INSTANTIATE_SORT_INDICES(float, Index) \ + INSTANTIATE_SORT_INDICES(double, Index) + +INSTANTIATE_FLOAT(std::uint32_t) +INSTANTIATE_FLOAT(std::int32_t) + +#define INSTANTIATE_RADIX_SORT(Index) \ + template ONEDAL_EXPORT sycl::event radix_sort(sycl::queue&, \ + ndview&, \ + ndview&, \ + const event_vector&); + +INSTANTIATE_RADIX_SORT(std::int32_t) +INSTANTIATE_RADIX_SORT(std::uint32_t) +INSTANTIATE_RADIX_SORT(std::int64_t) +INSTANTIATE_RADIX_SORT(std::uint64_t) + +#define INSTANTIATE_RADIX_SORT_WITH_COUNT(Index) \ + template ONEDAL_EXPORT sycl::event radix_sort(sycl::queue&, \ + ndview&, \ + ndview&, \ + std::int64_t, \ + const event_vector&); + +INSTANTIATE_RADIX_SORT_WITH_COUNT(std::int32_t) +INSTANTIATE_RADIX_SORT_WITH_COUNT(std::uint32_t) +INSTANTIATE_RADIX_SORT_WITH_COUNT(std::int64_t) +INSTANTIATE_RADIX_SORT_WITH_COUNT(std::uint64_t) -INSTANTIATE_SORT(std::int32_t) -INSTANTIATE_SORT(std::uint32_t) -INSTANTIATE_SORT(std::int64_t) -INSTANTIATE_SORT(std::uint64_t) } // namespace oneapi::dal::backend::primitives diff --git a/cpp/oneapi/dal/backend/primitives/sort/test/sort_dpc.cpp b/cpp/oneapi/dal/backend/primitives/sort/test/sort_dpc.cpp index b8b4afa8fe4..ba4f8af27b3 100644 --- a/cpp/oneapi/dal/backend/primitives/sort/test/sort_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/sort/test/sort_dpc.cpp @@ -76,7 +76,7 @@ class sort_with_indices_test : public te::policy_fixture { auto ref = create_reference_on_host(val); INFO("run sort with indices"); - auto event = radix_sort_indices_inplace{ this->get_queue() }(val, ind); + auto event = radix_sort_indices_inplace(this->get_queue(), val, ind); event.wait_and_throw(); check_results(val, ind, ref); @@ -153,7 +153,7 @@ class sort_test : public te::policy_fixture { auto val_out = ndarray::empty(q, { vector_count, elem_count }); INFO("run sort"); - radix_sort{ this->get_queue() }(val, val_out, sorted_elem_count).wait_and_throw(); + radix_sort(this->get_queue(), val, val_out, sorted_elem_count).wait_and_throw(); check_results(val_out, ref, sorted_elem_count); } diff --git a/cpp/oneapi/dal/backend/primitives/voting/large_k_uniform_dpc.cpp b/cpp/oneapi/dal/backend/primitives/voting/large_k_uniform_dpc.cpp index 9a5107ddecd..4d3170d97e4 100644 --- a/cpp/oneapi/dal/backend/primitives/voting/large_k_uniform_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/voting/large_k_uniform_dpc.cpp @@ -32,9 +32,9 @@ large_k_uniform_voting::large_k_uniform_voting(sycl::queue& q, : base_t{ q }, swp_( ndarray::empty(q, { max_block, k_response }, sycl::usm::alloc::device)), - out_( - ndarray::empty(q, { max_block, k_response }, sycl::usm::alloc::device)), - sorting_{ q } {} + out_(ndarray::empty(q, + { max_block, k_response }, + sycl::usm::alloc::device)) {} template sycl::event large_k_uniform_voting::select_winner(ndview& results, @@ -85,7 +85,7 @@ sycl::event large_k_uniform_voting::operator()(const ndviewget_queue(), swp_slice, responses, deps); - auto srt_event = sorting_(swp_slice, out_slice, { cpy_event }); + auto srt_event = radix_sort(this->get_queue(), swp_slice, out_slice, { cpy_event }); return select_winner(results, { srt_event }); } diff --git a/cpp/oneapi/dal/backend/primitives/voting/uniform.hpp b/cpp/oneapi/dal/backend/primitives/voting/uniform.hpp index 0edfcdade8c..216ff5defdd 100644 --- a/cpp/oneapi/dal/backend/primitives/voting/uniform.hpp +++ b/cpp/oneapi/dal/backend/primitives/voting/uniform.hpp @@ -56,7 +56,6 @@ class large_k_uniform_voting : public uniform_voting { sycl::event select_winner(ndview& results, const event_vector& deps) const; ndarray swp_, out_; - radix_sort sorting_; }; template From 0b553e8ad3350a947b3f8c212cb47864ace1862a Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Tue, 21 Jan 2025 00:24:47 -0800 Subject: [PATCH 03/14] minor fix --- .ci/env/apt.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/env/apt.sh b/.ci/env/apt.sh index f3f95a5eb7f..1dcc3b816f2 100755 --- a/.ci/env/apt.sh +++ b/.ci/env/apt.sh @@ -44,7 +44,7 @@ function install_mkl { } function install_dpl { - sudo apt-get install -y intel-oneapi-dpl-2022.7.0 + sudo apt-get install -y intel-oneapi-dpl } function install_clang-format { From 2a91928f9fdbb9d07fe8b5301f0ebbc15d49dbe3 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Tue, 21 Jan 2025 02:33:40 -0800 Subject: [PATCH 04/14] minor fix --- .ci/env/apt.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/env/apt.sh b/.ci/env/apt.sh index 1dcc3b816f2..f37835a45f7 100755 --- a/.ci/env/apt.sh +++ b/.ci/env/apt.sh @@ -44,7 +44,7 @@ function install_mkl { } function install_dpl { - sudo apt-get install -y intel-oneapi-dpl + sudo apt-get install -y onedpl-devel } function install_clang-format { From ab367c07a7af295faf3d23dbca223a543a2a4382 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Wed, 22 Jan 2025 00:55:43 -0800 Subject: [PATCH 05/14] minor fix for dpl from toolkit --- .ci/env/apt.sh | 7 ------- .ci/pipeline/ci.yml | 14 ++++++++------ .ci/scripts/install_basekit.sh | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 13 deletions(-) create mode 100644 .ci/scripts/install_basekit.sh diff --git a/.ci/env/apt.sh b/.ci/env/apt.sh index f37835a45f7..449ea1a6893 100755 --- a/.ci/env/apt.sh +++ b/.ci/env/apt.sh @@ -43,10 +43,6 @@ function install_mkl { install_tbb } -function install_dpl { - sudo apt-get install -y onedpl-devel -} - function install_clang-format { sudo apt-get install -y clang-format-14 } @@ -112,9 +108,6 @@ elif [ "${component}" == "tbb" ]; then elif [ "${component}" == "mkl" ]; then add_repo install_mkl -elif [ "${component}" == "dpl" ]; then - add_repo - install_dpl elif [ "${component}" == "gnu-cross-compilers" ]; then update install_gnu-cross-compilers "$2" diff --git a/.ci/pipeline/ci.yml b/.ci/pipeline/ci.yml index 1012eff6991..0a23b67ebc5 100755 --- a/.ci/pipeline/ci.yml +++ b/.ci/pipeline/ci.yml @@ -30,6 +30,8 @@ variables: SYSROOT_OS: 'jammy' WINDOWS_BASEKIT_URL: 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe' WINDOWS_DPCPP_COMPONENTS: 'intel.oneapi.win.mkl.devel:intel.oneapi.win.tbb.devel:intel.oneapi.win.dpl' + LINUX_BASEKIT_URL: 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/dfc4a434-838c-4450-a6fe-2fa903b75aa7/intel-oneapi-base-toolkit-2025.0.1.46_offline.sh' + LINUX_DPCPP_COMPONENTS: 'intel.oneapi.lin.dpl' resources: repositories: @@ -71,9 +73,9 @@ jobs: - script: | .ci/env/apt.sh mkl displayName: 'mkl installation' - - script: | - .ci/env/apt.sh dpl - displayName: 'dpl installation' + - script: + .ci/scripts/install_basekit.bat $(LINUX_BASEKIT_URL) $(LINUX_DPCPP_COMPONENTS) + displayName: 'Install oneAPI Base Toolkit' - script: | source /opt/intel/oneapi/setvars.sh .ci/scripts/describe_system.sh @@ -396,9 +398,9 @@ jobs: - script: | .ci/env/apt.sh mkl displayName: 'mkl installation' - - script: | - .ci/env/apt.sh dpl - displayName: 'dpl installation' + - script: + .ci/scripts/install_basekit.bat $(LINUX_BASEKIT_URL) $(LINUX_DPCPP_COMPONENTS) + displayName: 'Install oneAPI Base Toolkit' - script: | source /opt/intel/oneapi/setvars.sh .ci/scripts/describe_system.sh diff --git a/.ci/scripts/install_basekit.sh b/.ci/scripts/install_basekit.sh new file mode 100644 index 00000000000..f363b66f447 --- /dev/null +++ b/.ci/scripts/install_basekit.sh @@ -0,0 +1,34 @@ +#!/bin/bash +#=============================================================================== +# Copyright 2025 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +URL=$1 +COMPONENTS=$2 + +curl --output webimage.sh --url "$URL" --retry 5 --retry-delay 5 +chmod +x webimage.sh +./webimage.sh -x -f webimage_extracted --log extract.log +rm -rf webimage.sh +WEBIMAGE_NAME=$(ls -1 webimage_extracted/) +if [ -z "$COMPONENTS" ]; then + sudo webimage_extracted/"$WEBIMAGE_NAME"/bootstrapper -s --action install --eula=accept --log-dir=. + installer_exit_code=$? +else + sudo webimage_extracted/"$WEBIMAGE_NAME"/bootstrapper -s --action install --components="$COMPONENTS" --eula=accept --log-dir=. + installer_exit_code=$? +fi +rm -rf webimage_extracted +exit $installer_exit_code \ No newline at end of file From e053cdf646cf78ea5cfdd1af0b3292a140c0f36e Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Wed, 22 Jan 2025 01:34:19 -0800 Subject: [PATCH 06/14] minor fix for script --- .ci/pipeline/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.ci/pipeline/ci.yml b/.ci/pipeline/ci.yml index 0a23b67ebc5..0d4c88638d6 100755 --- a/.ci/pipeline/ci.yml +++ b/.ci/pipeline/ci.yml @@ -74,7 +74,7 @@ jobs: .ci/env/apt.sh mkl displayName: 'mkl installation' - script: - .ci/scripts/install_basekit.bat $(LINUX_BASEKIT_URL) $(LINUX_DPCPP_COMPONENTS) + .ci/scripts/install_basekit.sh $(LINUX_BASEKIT_URL) $(LINUX_DPCPP_COMPONENTS) displayName: 'Install oneAPI Base Toolkit' - script: | source /opt/intel/oneapi/setvars.sh @@ -399,7 +399,7 @@ jobs: .ci/env/apt.sh mkl displayName: 'mkl installation' - script: - .ci/scripts/install_basekit.bat $(LINUX_BASEKIT_URL) $(LINUX_DPCPP_COMPONENTS) + .ci/scripts/install_basekit.sh $(LINUX_BASEKIT_URL) $(LINUX_DPCPP_COMPONENTS) displayName: 'Install oneAPI Base Toolkit' - script: | source /opt/intel/oneapi/setvars.sh From 3f1a6fe8108374bb54084e50ff9f4b3d76308402 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Wed, 22 Jan 2025 04:16:08 -0800 Subject: [PATCH 07/14] minor fixes --- .ci/scripts/install_basekit.sh | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/.ci/scripts/install_basekit.sh b/.ci/scripts/install_basekit.sh index f363b66f447..8782192316a 100644 --- a/.ci/scripts/install_basekit.sh +++ b/.ci/scripts/install_basekit.sh @@ -18,17 +18,20 @@ URL=$1 COMPONENTS=$2 -curl --output webimage.sh --url "$URL" --retry 5 --retry-delay 5 -chmod +x webimage.sh -./webimage.sh -x -f webimage_extracted --log extract.log -rm -rf webimage.sh -WEBIMAGE_NAME=$(ls -1 webimage_extracted/) +# Download the installation script +curl --output installer.sh --url "$URL" --retry 5 --retry-delay 5 +chmod +x installer.sh + +# Define default components if none are provided if [ -z "$COMPONENTS" ]; then - sudo webimage_extracted/"$WEBIMAGE_NAME"/bootstrapper -s --action install --eula=accept --log-dir=. - installer_exit_code=$? -else - sudo webimage_extracted/"$WEBIMAGE_NAME"/bootstrapper -s --action install --components="$COMPONENTS" --eula=accept --log-dir=. - installer_exit_code=$? + COMPONENTS="intel.oneapi.lin.dpl" fi -rm -rf webimage_extracted -exit $installer_exit_code \ No newline at end of file + +# Execute the installation script +sudo sh installer.sh -a --silent --eula accept --components "$COMPONENTS" +installer_exit_code=$? + +# Clean up +rm -f installer.sh + +exit $installer_exit_code From 700cd10900c3cc25d44b36f8011b97261b231b1e Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Wed, 22 Jan 2025 04:28:28 -0800 Subject: [PATCH 08/14] minor fix --- .ci/pipeline/ci.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.ci/pipeline/ci.yml b/.ci/pipeline/ci.yml index 0d4c88638d6..8e73026749d 100755 --- a/.ci/pipeline/ci.yml +++ b/.ci/pipeline/ci.yml @@ -73,6 +73,9 @@ jobs: - script: | .ci/env/apt.sh mkl displayName: 'mkl installation' + - script: + chmod +x .ci/scripts/install_basekit.sh + displayName: 'Make install_basekit.sh executable' - script: .ci/scripts/install_basekit.sh $(LINUX_BASEKIT_URL) $(LINUX_DPCPP_COMPONENTS) displayName: 'Install oneAPI Base Toolkit' @@ -398,6 +401,9 @@ jobs: - script: | .ci/env/apt.sh mkl displayName: 'mkl installation' + - script: + chmod +x .ci/scripts/install_basekit.sh + displayName: 'Make install_basekit.sh executable' - script: .ci/scripts/install_basekit.sh $(LINUX_BASEKIT_URL) $(LINUX_DPCPP_COMPONENTS) displayName: 'Install oneAPI Base Toolkit' From 809760f82f1daf6b750655bb20b492a7953eee3b Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Wed, 22 Jan 2025 04:53:10 -0800 Subject: [PATCH 09/14] minor fix --- .ci/pipeline/ci.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.ci/pipeline/ci.yml b/.ci/pipeline/ci.yml index 8e73026749d..1bbfc55ff0f 100755 --- a/.ci/pipeline/ci.yml +++ b/.ci/pipeline/ci.yml @@ -30,8 +30,7 @@ variables: SYSROOT_OS: 'jammy' WINDOWS_BASEKIT_URL: 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe' WINDOWS_DPCPP_COMPONENTS: 'intel.oneapi.win.mkl.devel:intel.oneapi.win.tbb.devel:intel.oneapi.win.dpl' - LINUX_BASEKIT_URL: 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/dfc4a434-838c-4450-a6fe-2fa903b75aa7/intel-oneapi-base-toolkit-2025.0.1.46_offline.sh' - LINUX_DPCPP_COMPONENTS: 'intel.oneapi.lin.dpl' + DPL_URL: 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/37b4aa24-9575-4e7e-91fc-56207fd7cc9c/intel-onedpl-2022.7.1.16_offline.exe' resources: repositories: @@ -77,7 +76,7 @@ jobs: chmod +x .ci/scripts/install_basekit.sh displayName: 'Make install_basekit.sh executable' - script: - .ci/scripts/install_basekit.sh $(LINUX_BASEKIT_URL) $(LINUX_DPCPP_COMPONENTS) + .ci/scripts/install_basekit.sh $(DPL_URL) $(LINUX_DPCPP_COMPONENTS) displayName: 'Install oneAPI Base Toolkit' - script: | source /opt/intel/oneapi/setvars.sh @@ -405,7 +404,7 @@ jobs: chmod +x .ci/scripts/install_basekit.sh displayName: 'Make install_basekit.sh executable' - script: - .ci/scripts/install_basekit.sh $(LINUX_BASEKIT_URL) $(LINUX_DPCPP_COMPONENTS) + .ci/scripts/install_basekit.sh $(DPL_URL) $(LINUX_DPCPP_COMPONENTS) displayName: 'Install oneAPI Base Toolkit' - script: | source /opt/intel/oneapi/setvars.sh From d01ea31932d7b2ff7263c174aa4467ee3f9f8770 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Wed, 22 Jan 2025 05:01:51 -0800 Subject: [PATCH 10/14] minor fix for dpl --- .ci/pipeline/ci.yml | 4 ++-- .ci/scripts/install_basekit.sh | 8 ++------ 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/.ci/pipeline/ci.yml b/.ci/pipeline/ci.yml index 1bbfc55ff0f..f5eca3b806a 100755 --- a/.ci/pipeline/ci.yml +++ b/.ci/pipeline/ci.yml @@ -76,7 +76,7 @@ jobs: chmod +x .ci/scripts/install_basekit.sh displayName: 'Make install_basekit.sh executable' - script: - .ci/scripts/install_basekit.sh $(DPL_URL) $(LINUX_DPCPP_COMPONENTS) + .ci/scripts/install_basekit.sh $(DPL_URL) displayName: 'Install oneAPI Base Toolkit' - script: | source /opt/intel/oneapi/setvars.sh @@ -404,7 +404,7 @@ jobs: chmod +x .ci/scripts/install_basekit.sh displayName: 'Make install_basekit.sh executable' - script: - .ci/scripts/install_basekit.sh $(DPL_URL) $(LINUX_DPCPP_COMPONENTS) + .ci/scripts/install_basekit.sh $(DPL_URL) displayName: 'Install oneAPI Base Toolkit' - script: | source /opt/intel/oneapi/setvars.sh diff --git a/.ci/scripts/install_basekit.sh b/.ci/scripts/install_basekit.sh index 8782192316a..76af84b6f14 100644 --- a/.ci/scripts/install_basekit.sh +++ b/.ci/scripts/install_basekit.sh @@ -16,19 +16,15 @@ #=============================================================================== URL=$1 -COMPONENTS=$2 + # Download the installation script curl --output installer.sh --url "$URL" --retry 5 --retry-delay 5 chmod +x installer.sh -# Define default components if none are provided -if [ -z "$COMPONENTS" ]; then - COMPONENTS="intel.oneapi.lin.dpl" -fi # Execute the installation script -sudo sh installer.sh -a --silent --eula accept --components "$COMPONENTS" +sudo sh installer.sh -a --silent --eula accept installer_exit_code=$? # Clean up From 064bb12f5ca0a0e9dbad99d5d42d4f9c3790fa40 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Wed, 22 Jan 2025 05:07:47 -0800 Subject: [PATCH 11/14] fix correct link --- .ci/pipeline/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/pipeline/ci.yml b/.ci/pipeline/ci.yml index f5eca3b806a..f5c734a09d9 100755 --- a/.ci/pipeline/ci.yml +++ b/.ci/pipeline/ci.yml @@ -30,7 +30,7 @@ variables: SYSROOT_OS: 'jammy' WINDOWS_BASEKIT_URL: 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe' WINDOWS_DPCPP_COMPONENTS: 'intel.oneapi.win.mkl.devel:intel.oneapi.win.tbb.devel:intel.oneapi.win.dpl' - DPL_URL: 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/37b4aa24-9575-4e7e-91fc-56207fd7cc9c/intel-onedpl-2022.7.1.16_offline.exe' + DPL_URL: 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/de3c613f-829c-4bdc-aa2b-6129eece3bd9/intel-onedpl-2022.7.1.15_offline.sh' resources: repositories: From 6e3587d8eaece0bcfc9fd7cb223b9f45cfdd127c Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Wed, 22 Jan 2025 12:15:42 -0800 Subject: [PATCH 12/14] minor fixes --- .ci/pipeline/ci.yml | 12 +++--------- .ci/scripts/{install_basekit.sh => install_dpl.sh} | 4 +--- cpp/daal/BUILD | 2 -- cpp/oneapi/dal/backend/primitives/sort/sort_dpc.cpp | 1 + .../dal/backend/primitives/sort/test/sort_dpc.cpp | 2 +- dev/bazel/deps/dpl.bzl | 4 ++-- makefile | 1 - 7 files changed, 8 insertions(+), 18 deletions(-) rename .ci/scripts/{install_basekit.sh => install_dpl.sh} (95%) diff --git a/.ci/pipeline/ci.yml b/.ci/pipeline/ci.yml index f5c734a09d9..43bfe17fd09 100755 --- a/.ci/pipeline/ci.yml +++ b/.ci/pipeline/ci.yml @@ -30,7 +30,7 @@ variables: SYSROOT_OS: 'jammy' WINDOWS_BASEKIT_URL: 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe' WINDOWS_DPCPP_COMPONENTS: 'intel.oneapi.win.mkl.devel:intel.oneapi.win.tbb.devel:intel.oneapi.win.dpl' - DPL_URL: 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/de3c613f-829c-4bdc-aa2b-6129eece3bd9/intel-onedpl-2022.7.1.15_offline.sh' + LINUX_DPL_URL: 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/de3c613f-829c-4bdc-aa2b-6129eece3bd9/intel-onedpl-2022.7.1.15_offline.sh' resources: repositories: @@ -73,10 +73,7 @@ jobs: .ci/env/apt.sh mkl displayName: 'mkl installation' - script: - chmod +x .ci/scripts/install_basekit.sh - displayName: 'Make install_basekit.sh executable' - - script: - .ci/scripts/install_basekit.sh $(DPL_URL) + chmod +x .ci/scripts/install_dpl.sh && .ci/scripts/install_dpl.sh $(LINUX_DPL_URL) displayName: 'Install oneAPI Base Toolkit' - script: | source /opt/intel/oneapi/setvars.sh @@ -401,10 +398,7 @@ jobs: .ci/env/apt.sh mkl displayName: 'mkl installation' - script: - chmod +x .ci/scripts/install_basekit.sh - displayName: 'Make install_basekit.sh executable' - - script: - .ci/scripts/install_basekit.sh $(DPL_URL) + chmod +x .ci/scripts/install_dpl.sh && .ci/scripts/install_dpl.sh $(LINUX_DPL_URL) displayName: 'Install oneAPI Base Toolkit' - script: | source /opt/intel/oneapi/setvars.sh diff --git a/.ci/scripts/install_basekit.sh b/.ci/scripts/install_dpl.sh similarity index 95% rename from .ci/scripts/install_basekit.sh rename to .ci/scripts/install_dpl.sh index 76af84b6f14..92ed60a202b 100644 --- a/.ci/scripts/install_basekit.sh +++ b/.ci/scripts/install_dpl.sh @@ -1,6 +1,6 @@ #!/bin/bash #=============================================================================== -# Copyright 2025 Intel Corporation +# Copyright contributors to the oneDAL project # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,12 +17,10 @@ URL=$1 - # Download the installation script curl --output installer.sh --url "$URL" --retry 5 --retry-delay 5 chmod +x installer.sh - # Execute the installation script sudo sh installer.sh -a --silent --eula accept installer_exit_code=$? diff --git a/cpp/daal/BUILD b/cpp/daal/BUILD index ddd8004fadc..3ff445745df 100644 --- a/cpp/daal/BUILD +++ b/cpp/daal/BUILD @@ -58,12 +58,10 @@ daal_module( "@config//:backend_ref": [ ":public_includes", "@openblas//:headers", - "@dpl//:headers", ], "//conditions:default": [ ":public_includes", "@mkl//:headers", - "@dpl//:headers", ], }), ) diff --git a/cpp/oneapi/dal/backend/primitives/sort/sort_dpc.cpp b/cpp/oneapi/dal/backend/primitives/sort/sort_dpc.cpp index 11069f26580..758e52cb268 100644 --- a/cpp/oneapi/dal/backend/primitives/sort/sort_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/sort/sort_dpc.cpp @@ -25,6 +25,7 @@ #pragma clang diagnostic ignored "-Wunused-local-typedef" #include + #pragma clang diagnostic pop namespace oneapi::dal::backend::primitives { diff --git a/cpp/oneapi/dal/backend/primitives/sort/test/sort_dpc.cpp b/cpp/oneapi/dal/backend/primitives/sort/test/sort_dpc.cpp index ba4f8af27b3..93ae9fee9c0 100644 --- a/cpp/oneapi/dal/backend/primitives/sort/test/sort_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/sort/test/sort_dpc.cpp @@ -203,7 +203,7 @@ TEMPLATE_LIST_TEST_M(sort_with_indices_test, sort_indices_types) { SKIP_IF(this->get_policy().is_cpu()); - std::int64_t elem_count = GENERATE_COPY(100, 1000, 10000, 100000, 10000000); + std::int64_t elem_count = GENERATE_COPY(100, 1000, 10000, 100000); auto [val, ind] = this->allocate_arrays(elem_count); this->fill_uniform(val, -25., 25.); diff --git a/dev/bazel/deps/dpl.bzl b/dev/bazel/deps/dpl.bzl index 727dafcf62a..93ee00b4c03 100644 --- a/dev/bazel/deps/dpl.bzl +++ b/dev/bazel/deps/dpl.bzl @@ -1,5 +1,5 @@ #=============================================================================== -# Copyright 2025 Intel Corporation +# Copyright contributors to the oneDAL project # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,4 +24,4 @@ dpl_repo = repos.prebuilt_libs_repo_rule( "lib", ], build_template = "@onedal//dev/bazel/deps:dpl.tpl.BUILD", -) \ No newline at end of file +) diff --git a/makefile b/makefile index 4af44c84db9..4bbedca0246 100644 --- a/makefile +++ b/makefile @@ -314,7 +314,6 @@ ifeq ($(REQPROFILE), yes) VTUNESDK.LIBS_A := $(if $(OS_is_lnx), $(VTUNESDK.libia)/libittnotify.a,) endif - #=============================== oneDPL folders ====================================== ONEDPL.include := $(DPL_ROOT)/include From a60eb07b1fc6b0a49f07452233654d8734f4091d Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Thu, 23 Jan 2025 01:17:46 -0800 Subject: [PATCH 13/14] minor fix --- .ci/pipeline/ci.yml | 4 ++-- INSTALL.md | 21 +++++++++++++++++++-- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/.ci/pipeline/ci.yml b/.ci/pipeline/ci.yml index 43bfe17fd09..eecc750fbfa 100755 --- a/.ci/pipeline/ci.yml +++ b/.ci/pipeline/ci.yml @@ -74,7 +74,7 @@ jobs: displayName: 'mkl installation' - script: chmod +x .ci/scripts/install_dpl.sh && .ci/scripts/install_dpl.sh $(LINUX_DPL_URL) - displayName: 'Install oneAPI Base Toolkit' + displayName: 'oneDPL installation' - script: | source /opt/intel/oneapi/setvars.sh .ci/scripts/describe_system.sh @@ -399,7 +399,7 @@ jobs: displayName: 'mkl installation' - script: chmod +x .ci/scripts/install_dpl.sh && .ci/scripts/install_dpl.sh $(LINUX_DPL_URL) - displayName: 'Install oneAPI Base Toolkit' + displayName: ''oneDPL installation'' - script: | source /opt/intel/oneapi/setvars.sh .ci/scripts/describe_system.sh diff --git a/INSTALL.md b/INSTALL.md index dab844272bd..719161b2b24 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -23,6 +23,7 @@ Required Software: * BLAS and LAPACK libraries - both provided by oneMKL * Python version 3.9 or higher * TBB library (repository contains script to download it) +* oneDPL library * Microsoft Visual Studio\* (Windows\* only) * [MSYS2](http://msys2.github.io) (Windows\* only) * `make` and `dos2unix` tools; install these packages using MSYS2 on Windows\* as follows: @@ -113,9 +114,25 @@ is available as an alternative to the manual setup. ./dev/download_tbb.sh -6. Download and install Python (version 3.9 or higher). +6. Set up Intel(R) Threading Building Blocks (Intel(R) TBB): -7. Build oneDAL via command-line interface. Choose the appropriate commands based on the interface, platform, and the compiler you use. Interface and platform are required arguments of makefile while others are optional. Below you can find the set of examples for building oneDAL. You may use a combination of them to get the desired build configuration: + _Note: if you used the general oneAPI setvars script from a Base Toolkit installation, this step will not be necessary as oneDPL will already have been set up._ + + Download and install [Intel(R) oneDPL](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-library.html). + Set the environment variables for for Intel(R) oneDPL. For example: + + - oneDPL (Windows\*): + + call "C:\Program Files (x86)\Intel\oneAPI\dpl\latest\env\vars.bat" intel64 + + - oneDPL (Linux\*): + + source /opt/intel/oneapi/dpl/latest/env/vars.sh intel64 + + +7. Download and install Python (version 3.9 or higher). + +8. Build oneDAL via command-line interface. Choose the appropriate commands based on the interface, platform, and the compiler you use. Interface and platform are required arguments of makefile while others are optional. Below you can find the set of examples for building oneDAL. You may use a combination of them to get the desired build configuration: - DAAL interfaces on **Linux\*** using **Intel(R) C++ Compiler**: From 16c8f6cde6aebe70a5d82acfb75ef702defb7367 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Thu, 23 Jan 2025 02:26:51 -0800 Subject: [PATCH 14/14] minor fix --- .ci/pipeline/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/pipeline/ci.yml b/.ci/pipeline/ci.yml index eecc750fbfa..6a248ea04a3 100755 --- a/.ci/pipeline/ci.yml +++ b/.ci/pipeline/ci.yml @@ -399,7 +399,7 @@ jobs: displayName: 'mkl installation' - script: chmod +x .ci/scripts/install_dpl.sh && .ci/scripts/install_dpl.sh $(LINUX_DPL_URL) - displayName: ''oneDPL installation'' + displayName: 'oneDPL installation' - script: | source /opt/intel/oneapi/setvars.sh .ci/scripts/describe_system.sh