Skip to content

Commit

Permalink
move neural_speed gemms to contrib_ops
Browse files Browse the repository at this point in the history
  • Loading branch information
luoyu-intel committed Jan 9, 2024
1 parent 3b1155e commit 009adb6
Show file tree
Hide file tree
Showing 14 changed files with 624 additions and 955 deletions.
4 changes: 2 additions & 2 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1177,8 +1177,8 @@ if (onnxruntime_USE_DNNL)
add_compile_definitions(DNNL_OPENMP)
endif()


if (onnxruntime_USE_NEURAL_SPEED AND NOT onnxruntime_MINIMAL_BUILD)
set(USE_NEURAL_SPEED FALSE)
if (onnxruntime_USE_NEURAL_SPEED)
include(neural_speed)
endif()

Expand Down
15 changes: 7 additions & 8 deletions cmake/external/neural_speed.cmake
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
set(BTLA_URL https://github.com/intel/neural-speed.git)
set(BTLA_TAG 368ccbd2823e7ecef862d09e7b2385e6b2553081) # bestla v0.1
set(NEURAL_SPEED_URL https://github.com/intel/neural-speed.git)
set(NEURAL_SPEED_TAG 18720b319d6921c28e59cc9e003e50cee9a85fcc) # kernel-only release v0.2

set(USE_NEURAL_SPEED FALSE)
if (onnxruntime_USE_NEURAL_SPEED)
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" AND onnxruntime_target_platform STREQUAL "x86_64")
set(USE_NEURAL_SPEED TRUE)
Expand All @@ -10,11 +9,11 @@ if (onnxruntime_USE_NEURAL_SPEED)
endif()
if(USE_NEURAL_SPEED)
FetchContent_Declare(
bestla
GIT_REPOSITORY ${BTLA_URL}
GIT_TAG ${BTLA_TAG}
neural_speed
GIT_REPOSITORY ${NEURAL_SPEED_URL}
GIT_TAG ${NEURAL_SPEED_TAG}
)
FetchContent_MakeAvailable(bestla)
add_compile_definitions(MLAS_NEURAL_SPEED)
FetchContent_MakeAvailable(neural_speed)
add_compile_definitions(ORT_NEURAL_SPEED)
endif()
endif()
12 changes: 0 additions & 12 deletions cmake/onnxruntime_mlas.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,6 @@ endif()

set(ONNXRUNTIME_MLAS_LIBS onnxruntime_mlas)

function(add_neural_speed)
target_link_libraries(onnxruntime_mlas PRIVATE bestla::bestla)
target_sources(onnxruntime_mlas PRIVATE
${MLAS_SRC_DIR}/bestla_gemm.cpp
)
set_target_properties(${target_name} PROPERTIES COMPILE_WARNING_AS_ERROR OFF)
endfunction()

#TODO: set MASM flags properly
function(setup_mlas_source_for_windows)

Expand Down Expand Up @@ -611,10 +603,6 @@ else()
target_sources(onnxruntime_mlas PRIVATE ${mlas_platform_srcs})
endif()

if(USE_NEURAL_SPEED)
add_neural_speed()
endif()

foreach(mlas_target ${ONNXRUNTIME_MLAS_LIBS})
target_include_directories(${mlas_target} PRIVATE ${ONNXRUNTIME_ROOT}/core/mlas/inc ${MLAS_SRC_DIR})
onnxruntime_add_include_to_target(${mlas_target} ${GSL_TARGET})
Expand Down
12 changes: 12 additions & 0 deletions cmake/onnxruntime_providers_cpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ if(NOT onnxruntime_DISABLE_CONTRIB_OPS)
"${ONNXRUNTIME_ROOT}/contrib_ops/cpu/aten_ops/aten_op_executor.cc"
)
endif()
if(NOT USE_NEURAL_SPEED)
list(REMOVE_ITEM onnxruntime_cpu_contrib_ops_srcs
"${ONNXRUNTIME_ROOT}/contrib_ops/cpu/quantization/bestla_defs.h"
"${ONNXRUNTIME_ROOT}/contrib_ops/cpu/quantization/bestla_gemm.cc"
"${ONNXRUNTIME_ROOT}/contrib_ops/cpu/quantization/bestla_gemm.h"
)
endif()
# add using ONNXRUNTIME_ROOT so they show up under the 'contrib_ops' folder in Visual Studio
source_group(TREE ${ONNXRUNTIME_ROOT} FILES ${onnxruntime_cpu_contrib_ops_srcs})
list(APPEND onnxruntime_providers_src ${onnxruntime_cpu_contrib_ops_srcs})
Expand Down Expand Up @@ -144,6 +151,11 @@ if (HAS_BITWISE_INSTEAD_OF_LOGICAL)
target_compile_options(onnxruntime_providers PRIVATE "-Wno-bitwise-instead-of-logical")
endif()

if(USE_NEURAL_SPEED)
target_link_libraries(onnxruntime_providers PRIVATE bestla::bestla)
set_target_properties(onnxruntime_providers PROPERTIES COMPILE_WARNING_AS_ERROR OFF) # ignore warnings inside neural-speed
endif()

if (MSVC)
target_compile_options(onnxruntime_providers PRIVATE "/bigobj")
# if(NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ Licensed under the MIT License.
#include "bestla/bestla_prologue_a.h"
#include "bestla/bestla_wrapper.h"

namespace bestla
{
namespace bestla {

using tAVX512F = gemm::SCoreRowNAvx512f<48, 8>;
using tAMX_BF16 = gemm::HCoreRowNAmxbf16<64, 16>;
Expand All @@ -33,14 +32,13 @@ using tWeiNInt = prologue_b::gemm::WeightKBlockNInteger<GC_T, ISA_T>;
template <class GC_T, BTLA_ISA ISA_T>
using tWeiNFloat = prologue_b::gemm::WeightKBlockNFloat<GC_T, ISA_T>;

class ORTThreading : public parallel::IThreading
{
public:
ORTThreading(void* tp);
void parallel_for(const parallel::thread_func& func) const override;
void set_threads(int nthreads) override { assert(0); }
void sync() const override { assert(0); }
void* mTp;
class ORTThreading : public parallel::IThreading {
public:
explicit ORTThreading(void* tp);
void parallel_for(const parallel::thread_func& func) const override;
void set_threads(int nthreads) override { assert(0); }
void sync() const override { assert(0); }
void* mTp;
};

} // namespace bestla
Loading

0 comments on commit 009adb6

Please sign in to comment.