diff --git a/cmake/onnxruntime_providers_cpu.cmake b/cmake/onnxruntime_providers_cpu.cmake index 2304a4f8a63c6..8bfe1945c9293 100644 --- a/cmake/onnxruntime_providers_cpu.cmake +++ b/cmake/onnxruntime_providers_cpu.cmake @@ -61,10 +61,10 @@ if(NOT onnxruntime_DISABLE_CONTRIB_OPS) ) endif() set(onnxruntime_cpu_neural_speed_srcs - "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/quantization/bestla_wrapper.h" - "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/quantization/bestla_defs.h" - "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/quantization/bestla_gemm.cc" - "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/quantization/bestla_gemm.h" + "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/quantization/neural_speed_wrapper.h" + "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/quantization/neural_speed_defs.h" + "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/quantization/neural_speed_gemm.cc" + "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/quantization/neural_speed_gemm.h" ) if(NOT USE_NEURAL_SPEED) list(REMOVE_ITEM onnxruntime_cpu_contrib_ops_srcs ${onnxruntime_cpu_neural_speed_srcs}) diff --git a/onnxruntime/contrib_ops/cpu/quantization/bestla_defs.h b/onnxruntime/contrib_ops/cpu/quantization/neural_speed_defs.h similarity index 95% rename from onnxruntime/contrib_ops/cpu/quantization/bestla_defs.h rename to onnxruntime/contrib_ops/cpu/quantization/neural_speed_defs.h index 9a92ba2994997..3ffb004357843 100644 --- a/onnxruntime/contrib_ops/cpu/quantization/bestla_defs.h +++ b/onnxruntime/contrib_ops/cpu/quantization/neural_speed_defs.h @@ -8,7 +8,7 @@ Licensed under the MIT License. #pragma once -#include "contrib_ops/cpu/quantization/bestla_wrapper.h" +#include "contrib_ops/cpu/quantization/neural_speed_wrapper.h" namespace bestla { diff --git a/onnxruntime/contrib_ops/cpu/quantization/bestla_gemm.cc b/onnxruntime/contrib_ops/cpu/quantization/neural_speed_gemm.cc similarity index 99% rename from onnxruntime/contrib_ops/cpu/quantization/bestla_gemm.cc rename to onnxruntime/contrib_ops/cpu/quantization/neural_speed_gemm.cc index 20d581c7c3fa6..7efcdf6cb43e6 100644 --- a/onnxruntime/contrib_ops/cpu/quantization/bestla_gemm.cc +++ b/onnxruntime/contrib_ops/cpu/quantization/neural_speed_gemm.cc @@ -6,15 +6,15 @@ Licensed under the MIT License. Module Name: - bestla_gemm.cpp + neural_speed_gemm.cpp Abstract: - Currently only support Q4 gemm. + GEMM template combinations of neural_speed. --*/ -#include "contrib_ops/cpu/quantization/bestla_defs.h" -#include "contrib_ops/cpu/quantization/bestla_gemm.h" +#include "contrib_ops/cpu/quantization/neural_speed_defs.h" +#include "contrib_ops/cpu/quantization/neural_speed_gemm.h" #include "core/platform/threadpool.h" using ThreadPool = onnxruntime::concurrency::ThreadPool; diff --git a/onnxruntime/contrib_ops/cpu/quantization/bestla_gemm.h b/onnxruntime/contrib_ops/cpu/quantization/neural_speed_gemm.h similarity index 98% rename from onnxruntime/contrib_ops/cpu/quantization/bestla_gemm.h rename to onnxruntime/contrib_ops/cpu/quantization/neural_speed_gemm.h index 27baacaa2ce7d..0ffece2be77f2 100644 --- a/onnxruntime/contrib_ops/cpu/quantization/bestla_gemm.h +++ b/onnxruntime/contrib_ops/cpu/quantization/neural_speed_gemm.h @@ -6,11 +6,11 @@ Licensed under the MIT License. Module Name: - bestla_gemm.h + neural_speed_gemm.h Abstract: - Currently only support Q4 gemm. + Prepack-weight GEMM APIs of neural_speed. --*/ #pragma once diff --git a/onnxruntime/contrib_ops/cpu/quantization/bestla_wrapper.h b/onnxruntime/contrib_ops/cpu/quantization/neural_speed_wrapper.h similarity index 100% rename from onnxruntime/contrib_ops/cpu/quantization/bestla_wrapper.h rename to onnxruntime/contrib_ops/cpu/quantization/neural_speed_wrapper.h