From f92e47e95b13a240e37caf7b36577983544f98fc Mon Sep 17 00:00:00 2001 From: Edward Chen <18449977+edgchen1@users.noreply.github.com> Date: Thu, 10 Feb 2022 19:17:08 -0800 Subject: [PATCH] Remove onnxruntime_util dependency on onnxruntime_framework (#10512) There's a circular dependency between onnxruntime_util and onnxruntime_framework. Remove onnxruntime_util's dependency on onnxruntime_framework. --- cmake/onnxruntime.cmake | 6 +++-- cmake/onnxruntime_framework.cmake | 6 ++++- cmake/onnxruntime_util.cmake | 6 ----- .../contrib_ops/cpu/element_wise_ops.cc | 4 +++- onnxruntime/core/framework/math.h | 23 +++++++++++++++++++ .../framework/orttraining_partial_executor.cc | 4 ++-- .../core/framework/sequential_executor.cc | 4 ++-- .../providers/cpu/math/element_wise_ops.cc | 4 +++- onnxruntime/core/providers/cpu/math/sign.cc | 2 +- .../core/providers/cpu/math/softmax_shared.cc | 5 +++- onnxruntime/core/providers/cpu/nn/shrink.cc | 2 +- .../core/providers/cpu/rnn/rnn_helpers.h | 1 + .../core/providers/cpu/tensor/isinf.cc | 2 +- .../core/providers/cpu/tensor/isnan.cc | 5 ++-- .../cuda/nvtx_profile_context.h} | 0 onnxruntime/core/util/distance.h | 2 +- onnxruntime/core/util/math.h | 6 ++--- onnxruntime/core/util/math_cpu.cc | 14 ++++++----- onnxruntime/core/util/math_cpuonly.h | 12 ---------- onnxruntime/core/util/thread_utils.cc | 6 ++++- .../test/common/tensor_op_test_utils.h | 3 +++ .../core/graph/gradient_builder_base.h | 3 ++- .../core/session/training_session.cc | 2 +- .../models/runner/training_runner.cc | 2 +- .../training_ops/cpu/math/scale.cc | 2 +- .../training_ops/cpu/nn/dropout_7.cc | 2 +- .../training_ops/cpu/op_gradients.cc | 2 +- .../cuda/communication/nccl_service.cc | 2 +- .../training_ops/cuda/communication/recv.cc | 2 +- .../training_ops/cuda/communication/send.cc | 4 ++-- .../training_ops/cuda/controlflow/record.cc | 4 ++-- .../training_ops/cuda/controlflow/wait.cc | 4 ++-- 32 files changed, 88 insertions(+), 58 deletions(-) create mode 100644 onnxruntime/core/framework/math.h rename onnxruntime/core/{profile/context.h => providers/cuda/nvtx_profile_context.h} (100%) diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake index 5f2312216493e..6fe4c6497a9ca 100644 --- a/cmake/onnxruntime.cmake +++ b/cmake/onnxruntime.cmake @@ -7,7 +7,7 @@ if(UNIX) set(OUTPUT_STYLE xcode) else() set(OUTPUT_STYLE gcc) - endif() + endif() else() set(SYMBOL_FILE ${CMAKE_CURRENT_BINARY_DIR}/onnxruntime_dll.def) set(OUTPUT_STYLE vc) @@ -157,6 +157,8 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Android" AND onnxruntime_BUILD_JAVA) endforeach() endif() +# This list is a reversed topological ordering of library dependencies. +# Earlier entries may depend on later ones. Later ones should not depend on earlier ones. set(onnxruntime_INTERNAL_LIBRARIES onnxruntime_session ${onnxruntime_libs} @@ -174,10 +176,10 @@ set(onnxruntime_INTERNAL_LIBRARIES ${onnxruntime_winml} onnxruntime_optimizer onnxruntime_providers - onnxruntime_util ${onnxruntime_tvm_libs} onnxruntime_framework onnxruntime_graph + onnxruntime_util ${ONNXRUNTIME_MLAS_LIBS} onnxruntime_common onnxruntime_flatbuffers diff --git a/cmake/onnxruntime_framework.cmake b/cmake/onnxruntime_framework.cmake index 82f1e75dbc3c1..5b6681fd20daa 100644 --- a/cmake/onnxruntime_framework.cmake +++ b/cmake/onnxruntime_framework.cmake @@ -53,7 +53,7 @@ if (onnxruntime_ENABLE_TRAINING OR onnxruntime_ENABLE_TRAINING_OPS) onnxruntime_add_include_to_target(onnxruntime_framework Python::Module) target_include_directories(onnxruntime_framework PRIVATE ${PROJECT_SOURCE_DIR}/external/dlpack/include) endif() - if (onnxruntime_USE_NCCL OR onnxruntime_USE_MPI) + if (onnxruntime_USE_NCCL OR onnxruntime_USE_MPI) target_include_directories(onnxruntime_framework PUBLIC ${MPI_CXX_INCLUDE_DIRS}) endif() endif() @@ -95,4 +95,8 @@ if (onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS_ENABLE_DUMP_TO_SQLDB) target_compile_definitions(onnxruntime_framework PRIVATE DEBUG_NODE_INPUTS_OUTPUTS_ENABLE_DUMP_TO_SQLDB) endif() +if (WIN32) + target_compile_definitions(onnxruntime_framework PRIVATE _SCL_SECURE_NO_WARNINGS) +endif() + install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/framework DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core) diff --git a/cmake/onnxruntime_util.cmake b/cmake/onnxruntime_util.cmake index 8449cf2e0f4e9..d25bd386ec9f2 100644 --- a/cmake/onnxruntime_util.cmake +++ b/cmake/onnxruntime_util.cmake @@ -4,17 +4,12 @@ file(GLOB_RECURSE onnxruntime_util_srcs CONFIGURE_DEPENDS "${ONNXRUNTIME_ROOT}/core/util/*.h" "${ONNXRUNTIME_ROOT}/core/util/*.cc" - "${ONNXRUNTIME_ROOT}/core/profile/*.h" - "${ONNXRUNTIME_ROOT}/core/profile/*.cc" ) source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_util_srcs}) onnxruntime_add_static_library(onnxruntime_util ${onnxruntime_util_srcs}) target_include_directories(onnxruntime_util PRIVATE ${ONNXRUNTIME_ROOT} PUBLIC ${eigen_INCLUDE_DIRS}) -if (onnxruntime_USE_CUDA) - target_include_directories(onnxruntime_util PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) -endif() onnxruntime_add_include_to_target(onnxruntime_util onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB}) if(UNIX) target_compile_options(onnxruntime_util PUBLIC "-Wno-error=comment") @@ -24,5 +19,4 @@ set_target_properties(onnxruntime_util PROPERTIES FOLDER "ONNXRuntime") add_dependencies(onnxruntime_util ${onnxruntime_EXTERNAL_DEPENDENCIES}) if (WIN32) target_compile_definitions(onnxruntime_util PRIVATE _SCL_SECURE_NO_WARNINGS) - target_compile_definitions(onnxruntime_framework PRIVATE _SCL_SECURE_NO_WARNINGS) endif() diff --git a/onnxruntime/contrib_ops/cpu/element_wise_ops.cc b/onnxruntime/contrib_ops/cpu/element_wise_ops.cc index ab14ce0c45119..633748c0b445b 100644 --- a/onnxruntime/contrib_ops/cpu/element_wise_ops.cc +++ b/onnxruntime/contrib_ops/cpu/element_wise_ops.cc @@ -1,7 +1,9 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "element_wise_ops.h" +#include "contrib_ops/cpu/element_wise_ops.h" + +#include "core/framework/math.h" #include "core/providers/cpu/math/element_wise_ops.h" namespace onnxruntime { diff --git a/onnxruntime/core/framework/math.h b/onnxruntime/core/framework/math.h new file mode 100644 index 0000000000000..609c2de118d63 --- /dev/null +++ b/onnxruntime/core/framework/math.h @@ -0,0 +1,23 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include + +#include "core/framework/tensor.h" +#include "core/util/math_cpuonly.h" + +namespace onnxruntime { + +template +auto EigenMap(Tensor& t) -> EigenVectorMap { + return EigenVectorMap(t.template MutableData(), gsl::narrow(t.Shape().Size())); +} + +template +auto EigenMap(const Tensor& t) -> ConstEigenVectorMap { + return ConstEigenVectorMap(t.template Data(), gsl::narrow(t.Shape().Size())); +} + +} // namespace onnxruntime diff --git a/onnxruntime/core/framework/orttraining_partial_executor.cc b/onnxruntime/core/framework/orttraining_partial_executor.cc index bfec6d6703574..aee060e2c403b 100644 --- a/onnxruntime/core/framework/orttraining_partial_executor.cc +++ b/onnxruntime/core/framework/orttraining_partial_executor.cc @@ -22,7 +22,7 @@ #ifdef ENABLE_NVTX_PROFILE // This header is for profile using Nvidia's visual profilier. #include "core/providers/cuda/nvtx_profile.h" -#include "core/profile/context.h" +#include "core/providers/cuda/nvtx_profile_context.h" #endif // #define TRACE_EXECUTION @@ -292,7 +292,7 @@ Status PartialExecutor::Execute(const SessionState& session_state, const std::ve } } #ifdef DEBUG_NODE_INPUTS_OUTPUTS - dump_context.program_counter = program_counter; + dump_context.program_counter = program_counter; utils::DumpNodeInputs(dump_context, op_kernel_context, p_op_kernel->Node(), session_state); #endif diff --git a/onnxruntime/core/framework/sequential_executor.cc b/onnxruntime/core/framework/sequential_executor.cc index 2391c2ab57aba..e100fc2aa524d 100644 --- a/onnxruntime/core/framework/sequential_executor.cc +++ b/onnxruntime/core/framework/sequential_executor.cc @@ -21,8 +21,8 @@ #ifdef ENABLE_NVTX_PROFILE // This header is for profile using Nvidia's visual profilier. -#include "core/providers/cuda/nvtx_profile.h" -#include "core/profile/context.h" +#include "core/providers/cuda/nvtx_profile.h" +#include "core/providers/cuda/nvtx_profile_context.h" #endif // #define TRACE_EXECUTION diff --git a/onnxruntime/core/providers/cpu/math/element_wise_ops.cc b/onnxruntime/core/providers/cpu/math/element_wise_ops.cc index 5224c7fef5735..73978d61a1885 100644 --- a/onnxruntime/core/providers/cpu/math/element_wise_ops.cc +++ b/onnxruntime/core/providers/cpu/math/element_wise_ops.cc @@ -1,8 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/framework/data_types_internal.h" #include "core/providers/cpu/math/element_wise_ops.h" + +#include "core/framework/data_types_internal.h" +#include "core/framework/math.h" #include "core/providers/cpu/tensor/utils.h" #include "core/providers/op_kernel_type_control.h" #include diff --git a/onnxruntime/core/providers/cpu/math/sign.cc b/onnxruntime/core/providers/cpu/math/sign.cc index c7ddda013c904..afeff1073384e 100644 --- a/onnxruntime/core/providers/cpu/math/sign.cc +++ b/onnxruntime/core/providers/cpu/math/sign.cc @@ -8,10 +8,10 @@ #include "core/common/common.h" #include "core/framework/data_types.h" #include "core/framework/element_type_lists.h" +#include "core/framework/math.h" #include "core/framework/op_kernel.h" #include "core/providers/op_kernel_type_control.h" #include "core/util/math.h" -#include "core/util/math_cpuonly.h" using namespace ::onnxruntime::common; using namespace ONNX_NAMESPACE; diff --git a/onnxruntime/core/providers/cpu/math/softmax_shared.cc b/onnxruntime/core/providers/cpu/math/softmax_shared.cc index f6e246e1b4ebd..b8eb8dae0977b 100644 --- a/onnxruntime/core/providers/cpu/math/softmax_shared.cc +++ b/onnxruntime/core/providers/cpu/math/softmax_shared.cc @@ -18,9 +18,12 @@ * limitations under the License. */ +#include "core/providers/cpu/math/softmax_shared.h" + #include #include -#include "core/providers/cpu/math/softmax_shared.h" +#include + #include "core/util/math.h" #include "core/util/math_cpuonly.h" #include "core/mlas/inc/mlas.h" diff --git a/onnxruntime/core/providers/cpu/nn/shrink.cc b/onnxruntime/core/providers/cpu/nn/shrink.cc index 0c336da99e24e..7a18e450b06c4 100644 --- a/onnxruntime/core/providers/cpu/nn/shrink.cc +++ b/onnxruntime/core/providers/cpu/nn/shrink.cc @@ -4,9 +4,9 @@ #include "core/providers/cpu/nn/shrink.h" #include "core/framework/element_type_lists.h" +#include "core/framework/math.h" #include "core/framework/utils.h" #include "core/providers/op_kernel_type_control.h" -#include "core/util/math_cpuonly.h" #include "core/util/math.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/cpu/rnn/rnn_helpers.h b/onnxruntime/core/providers/cpu/rnn/rnn_helpers.h index 5551ad1cad0d5..95a8d87cc7665 100644 --- a/onnxruntime/core/providers/cpu/rnn/rnn_helpers.h +++ b/onnxruntime/core/providers/cpu/rnn/rnn_helpers.h @@ -10,6 +10,7 @@ #include "core/common/common.h" #include "core/common/logging/logging.h" #include "core/framework/allocator.h" +#include "core/framework/tensor.h" #include "core/util/math.h" #include "core/util/math_cpuonly.h" #include "core/util/qmath.h" diff --git a/onnxruntime/core/providers/cpu/tensor/isinf.cc b/onnxruntime/core/providers/cpu/tensor/isinf.cc index 782cfeb9bee92..9b455ac7a9587 100644 --- a/onnxruntime/core/providers/cpu/tensor/isinf.cc +++ b/onnxruntime/core/providers/cpu/tensor/isinf.cc @@ -5,10 +5,10 @@ #include "core/common/common.h" #include "core/framework/data_types_internal.h" +#include "core/framework/math.h" #include "core/framework/op_kernel.h" #include "core/framework/tensor.h" #include "core/providers/op_kernel_type_control.h" -#include "core/util/math_cpuonly.h" namespace onnxruntime { // https://github.com/onnx/onnx/blob/master/docs/Operators.md#IsInf diff --git a/onnxruntime/core/providers/cpu/tensor/isnan.cc b/onnxruntime/core/providers/cpu/tensor/isnan.cc index a4ac251e54fe3..9fc784ed30823 100644 --- a/onnxruntime/core/providers/cpu/tensor/isnan.cc +++ b/onnxruntime/core/providers/cpu/tensor/isnan.cc @@ -1,9 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "isnan.h" -#include "core/util/math_cpuonly.h" +#include "core/providers/cpu/tensor/isnan.h" + #include "core/common/common.h" +#include "core/framework/math.h" #include "core/framework/tensor.h" #include "Eigen/src/Core/arch/Default/Half.h" diff --git a/onnxruntime/core/profile/context.h b/onnxruntime/core/providers/cuda/nvtx_profile_context.h similarity index 100% rename from onnxruntime/core/profile/context.h rename to onnxruntime/core/providers/cuda/nvtx_profile_context.h diff --git a/onnxruntime/core/util/distance.h b/onnxruntime/core/util/distance.h index 02d6147df5024..1a40d2142cb81 100644 --- a/onnxruntime/core/util/distance.h +++ b/onnxruntime/core/util/distance.h @@ -3,7 +3,7 @@ #pragma once #include -#include "math_cpuonly.h" +#include "core/util/math_cpuonly.h" namespace onnxruntime { diff --git a/onnxruntime/core/util/math.h b/onnxruntime/core/util/math.h index 393340ffc0938..97d2e7bd4bb4d 100644 --- a/onnxruntime/core/util/math.h +++ b/onnxruntime/core/util/math.h @@ -16,9 +16,10 @@ #pragma once +#include + #ifndef SHARED_PROVIDER #include "core/common/common.h" -#include "core/framework/tensor.h" #endif #ifndef CBLAS_ENUM_DEFINED_H @@ -89,8 +90,7 @@ void RowwiseSum(int N, int D, const T* x, T* y, // Sum of vector x, and writes the result to a single value y. template -void Sum(int N, const T* x, T* y, Provider* provider, - Tensor* scratch_ptr = nullptr); +void Sum(int N, const T* x, T* y, Provider* provider); template void Scale(int N, float alpha, const T* x, T* y, Provider* provider); diff --git a/onnxruntime/core/util/math_cpu.cc b/onnxruntime/core/util/math_cpu.cc index c09d885a23d81..164e88573c4cb 100644 --- a/onnxruntime/core/util/math_cpu.cc +++ b/onnxruntime/core/util/math_cpu.cc @@ -15,9 +15,11 @@ */ // Modifications Copyright (c) Microsoft. -#include -#include "core/util/math.h" #include "core/util/math_cpuonly.h" +#include "core/util/math.h" + +#include +#include #include "core/mlas/inc/mlas.h" #if defined(__GNUC__) #pragma GCC diagnostic push @@ -859,10 +861,10 @@ SPECIALIZED_ROWWISESUM(int64_t) SPECIALIZED_ROWWISESUM(double) #undef SPECIALIZED_ROWWISESUM -#define SPECIALIZED_SUM(T) \ - template <> \ - void Sum(int N, const T* x, T* y, CPUMathUtil* /* unused */, Tensor* /* unused */) { \ - *y = ConstEigenVectorMap(x, N).sum(); \ +#define SPECIALIZED_SUM(T) \ + template <> \ + void Sum(int N, const T* x, T* y, CPUMathUtil* /* unused */) { \ + *y = ConstEigenVectorMap(x, N).sum(); \ } SPECIALIZED_SUM(float); diff --git a/onnxruntime/core/util/math_cpuonly.h b/onnxruntime/core/util/math_cpuonly.h index d9214b16c0b12..7e70bfc99be7d 100644 --- a/onnxruntime/core/util/math_cpuonly.h +++ b/onnxruntime/core/util/math_cpuonly.h @@ -62,9 +62,6 @@ #pragma warning(pop) #endif -#ifndef SHARED_PROVIDER -#include "core/framework/tensor.h" -#endif namespace onnxruntime { // common Eigen types that we will often use @@ -109,15 +106,6 @@ template using ConstEigenMatrixMapRowMajorOuterStride = Eigen::Map, 0, Eigen::OuterStride<>>; -template -auto EigenMap(Tensor& t) -> EigenVectorMap { - return EigenVectorMap(t.template MutableData(), gsl::narrow(t.Shape().Size())); -} -template -auto EigenMap(const Tensor& t) -> ConstEigenVectorMap { - return ConstEigenVectorMap(t.template Data(), gsl::narrow(t.Shape().Size())); -} - class CPUMathUtil { public: /*CPUMathUtil contains some help method like generate a diff --git a/onnxruntime/core/util/thread_utils.cc b/onnxruntime/core/util/thread_utils.cc index f47791baac4b3..94218ee7c9e0f 100644 --- a/onnxruntime/core/util/thread_utils.cc +++ b/onnxruntime/core/util/thread_utils.cc @@ -1,4 +1,8 @@ -#include "thread_utils.h" +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/util/thread_utils.h" + #include #ifdef _WIN32 diff --git a/onnxruntime/test/common/tensor_op_test_utils.h b/onnxruntime/test/common/tensor_op_test_utils.h index 7a7c9b512b3c0..85371f21dc10b 100644 --- a/onnxruntime/test/common/tensor_op_test_utils.h +++ b/onnxruntime/test/common/tensor_op_test_utils.h @@ -6,11 +6,14 @@ #include #include +#include + #include "gtest/gtest.h" #include "core/common/common.h" #include "core/common/optional.h" #include "core/common/type_utils.h" +#include "core/framework/tensor.h" #include "core/util/math.h" namespace onnxruntime { diff --git a/orttraining/orttraining/core/graph/gradient_builder_base.h b/orttraining/orttraining/core/graph/gradient_builder_base.h index bd615dc0484d1..b2156660b8c1e 100644 --- a/orttraining/orttraining/core/graph/gradient_builder_base.h +++ b/orttraining/orttraining/core/graph/gradient_builder_base.h @@ -5,8 +5,9 @@ #include #include -#include "core/util/math.h" +#include "core/framework/float16.h" #include "core/graph/graph.h" +#include "core/util/math.h" #include "orttraining/core/graph/graph_augmenter.h" #include "orttraining/core/graph/gradient_config.h" #include "orttraining/core/graph/recompute_graph_utils.h" diff --git a/orttraining/orttraining/core/session/training_session.cc b/orttraining/orttraining/core/session/training_session.cc index a5b63d412165b..14c1afccc7aa8 100644 --- a/orttraining/orttraining/core/session/training_session.cc +++ b/orttraining/orttraining/core/session/training_session.cc @@ -36,7 +36,7 @@ #ifdef ENABLE_NVTX_PROFILE #include #include -#include "core/profile/context.h" +#include "core/providers/cuda/nvtx_profile_context.h" #endif namespace onnxruntime { diff --git a/orttraining/orttraining/models/runner/training_runner.cc b/orttraining/orttraining/models/runner/training_runner.cc index d4789d825729f..664b489c62348 100644 --- a/orttraining/orttraining/models/runner/training_runner.cc +++ b/orttraining/orttraining/models/runner/training_runner.cc @@ -13,7 +13,7 @@ #include "core/platform/env.h" #include "core/platform/path_lib.h" #ifdef ENABLE_NVTX_PROFILE -#include "core/profile/context.h" +#include "core/providers/cuda/nvtx_profile_context.h" #endif #include "core/session/environment.h" #include "orttraining/core/framework/checkpointing.h" diff --git a/orttraining/orttraining/training_ops/cpu/math/scale.cc b/orttraining/orttraining/training_ops/cpu/math/scale.cc index 42e3efe894b67..552fbc67540c3 100644 --- a/orttraining/orttraining/training_ops/cpu/math/scale.cc +++ b/orttraining/orttraining/training_ops/cpu/math/scale.cc @@ -2,8 +2,8 @@ // Licensed under the MIT License. #include "orttraining/training_ops/cpu/math/scale.h" +#include "core/framework/math.h" #include "core/providers/common.h" -#include "core/util/math_cpuonly.h" namespace onnxruntime { namespace contrib { diff --git a/orttraining/orttraining/training_ops/cpu/nn/dropout_7.cc b/orttraining/orttraining/training_ops/cpu/nn/dropout_7.cc index ed126eb2607dd..4aea989c9f003 100644 --- a/orttraining/orttraining/training_ops/cpu/nn/dropout_7.cc +++ b/orttraining/orttraining/training_ops/cpu/nn/dropout_7.cc @@ -2,7 +2,7 @@ // Licensed under the MIT License. #include "orttraining/training_ops/cpu/nn/dropout_7.h" -#include "core/util/math_cpuonly.h" +#include "core/framework/math.h" namespace onnxruntime { diff --git a/orttraining/orttraining/training_ops/cpu/op_gradients.cc b/orttraining/orttraining/training_ops/cpu/op_gradients.cc index a5ef415374907..e25acd6ea0e7d 100644 --- a/orttraining/orttraining/training_ops/cpu/op_gradients.cc +++ b/orttraining/orttraining/training_ops/cpu/op_gradients.cc @@ -143,7 +143,7 @@ Status SoftmaxGrad::Compute(OpKernelContext* context) const { math::Exp(nd, Ydata, eYdata, nullptr); for (size_t i = 0; i < N; ++i) { float sdY; - math::Sum(d, dYdata + i * d, &sdY, nullptr, nullptr); + math::Sum(d, dYdata + i * d, &sdY, nullptr); math::Axpy(d, -sdY, eYdata + i * d, dXdata + i * d, nullptr); } } else { diff --git a/orttraining/orttraining/training_ops/cuda/communication/nccl_service.cc b/orttraining/orttraining/training_ops/cuda/communication/nccl_service.cc index 2628e47c5ec3a..fb9c0e49a3b6a 100644 --- a/orttraining/orttraining/training_ops/cuda/communication/nccl_service.cc +++ b/orttraining/orttraining/training_ops/cuda/communication/nccl_service.cc @@ -5,7 +5,7 @@ #include "orttraining/training_ops/cuda/communication/nccl_service.h" #include "core/common/common.h" -#include "core/profile/context.h" +#include "core/providers/cuda/nvtx_profile_context.h" #include "core/providers/cuda/cuda_check_memory.h" #include "core/providers/cuda/cuda_common.h" #include "orttraining/core/framework/communication/mpi/mpi_context.h" diff --git a/orttraining/orttraining/training_ops/cuda/communication/recv.cc b/orttraining/orttraining/training_ops/cuda/communication/recv.cc index 4f230e23154f8..60ab05d9d2e83 100644 --- a/orttraining/orttraining/training_ops/cuda/communication/recv.cc +++ b/orttraining/orttraining/training_ops/cuda/communication/recv.cc @@ -7,7 +7,7 @@ #include "orttraining/training_ops/communication_common.h" #include "orttraining/training_ops/cuda/communication/nccl_service.h" #include "core/providers/cuda/nvtx_profile.h" -#include "core/profile/context.h" +#include "core/providers/cuda/nvtx_profile_context.h" #include "core/providers/cuda/cuda_check_memory.h" #include "core/providers/cuda/cuda_common.h" #include diff --git a/orttraining/orttraining/training_ops/cuda/communication/send.cc b/orttraining/orttraining/training_ops/cuda/communication/send.cc index 832472013fd29..d36ef09419905 100644 --- a/orttraining/orttraining/training_ops/cuda/communication/send.cc +++ b/orttraining/orttraining/training_ops/cuda/communication/send.cc @@ -6,8 +6,8 @@ #include "orttraining/training_ops/cuda/communication/send.h" #include "orttraining/training_ops/communication_common.h" #include "orttraining/training_ops/cuda/communication/nccl_service.h" -#include "core/providers/cuda/nvtx_profile.h" -#include "core/profile/context.h" +#include "core/providers/cuda/nvtx_profile.h" +#include "core/providers/cuda/nvtx_profile_context.h" #include "core/providers/cuda/cuda_check_memory.h" #include "core/providers/cuda/cuda_common.h" #include diff --git a/orttraining/orttraining/training_ops/cuda/controlflow/record.cc b/orttraining/orttraining/training_ops/cuda/controlflow/record.cc index b491128111a45..f29dcd13f50cb 100644 --- a/orttraining/orttraining/training_ops/cuda/controlflow/record.cc +++ b/orttraining/orttraining/training_ops/cuda/controlflow/record.cc @@ -6,8 +6,8 @@ // Include event mechanism shared by CPU and GPU implementations. #include "orttraining/training_ops/cpu/controlflow/event_pool.h" #include "orttraining/training_ops/cpu/controlflow/record.h" -#include "core/providers/cuda/nvtx_profile.h" -#include "core/profile/context.h" +#include "core/providers/cuda/nvtx_profile.h" +#include "core/providers/cuda/nvtx_profile_context.h" namespace onnxruntime { namespace cuda { diff --git a/orttraining/orttraining/training_ops/cuda/controlflow/wait.cc b/orttraining/orttraining/training_ops/cuda/controlflow/wait.cc index 75f44ffba7e85..1e466dd8d111a 100644 --- a/orttraining/orttraining/training_ops/cuda/controlflow/wait.cc +++ b/orttraining/orttraining/training_ops/cuda/controlflow/wait.cc @@ -6,8 +6,8 @@ // Include event mechanism shared by CPU and GPU implementations. #include "orttraining/training_ops/cpu/controlflow/event_pool.h" #include "orttraining/training_ops/cpu/controlflow/wait.h" -#include "core/providers/cuda/nvtx_profile.h" -#include "core/profile/context.h" +#include "core/providers/cuda/nvtx_profile.h" +#include "core/providers/cuda/nvtx_profile_context.h" namespace onnxruntime { namespace cuda {