diff --git a/cmake/onnxruntime_providers_cpu.cmake b/cmake/onnxruntime_providers_cpu.cmake
index 2304a4f8a63c6..8bfe1945c9293 100644
--- a/cmake/onnxruntime_providers_cpu.cmake
+++ b/cmake/onnxruntime_providers_cpu.cmake
@@ -61,10 +61,10 @@ if(NOT onnxruntime_DISABLE_CONTRIB_OPS)
     )
   endif()
   set(onnxruntime_cpu_neural_speed_srcs 
-    "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/quantization/bestla_wrapper.h"
-    "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/quantization/bestla_defs.h"
-    "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/quantization/bestla_gemm.cc"
-    "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/quantization/bestla_gemm.h"
+    "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/quantization/neural_speed_wrapper.h"
+    "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/quantization/neural_speed_defs.h"
+    "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/quantization/neural_speed_gemm.cc"
+    "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/quantization/neural_speed_gemm.h"
   )
   if(NOT USE_NEURAL_SPEED)
     list(REMOVE_ITEM onnxruntime_cpu_contrib_ops_srcs ${onnxruntime_cpu_neural_speed_srcs})
diff --git a/onnxruntime/contrib_ops/cpu/quantization/bestla_defs.h b/onnxruntime/contrib_ops/cpu/quantization/neural_speed_defs.h
similarity index 95%
rename from onnxruntime/contrib_ops/cpu/quantization/bestla_defs.h
rename to onnxruntime/contrib_ops/cpu/quantization/neural_speed_defs.h
index 9a92ba2994997..3ffb004357843 100644
--- a/onnxruntime/contrib_ops/cpu/quantization/bestla_defs.h
+++ b/onnxruntime/contrib_ops/cpu/quantization/neural_speed_defs.h
@@ -8,7 +8,7 @@ Licensed under the MIT License.
 
 #pragma once
 
-#include "contrib_ops/cpu/quantization/bestla_wrapper.h"
+#include "contrib_ops/cpu/quantization/neural_speed_wrapper.h"
 
 namespace bestla {
 
diff --git a/onnxruntime/contrib_ops/cpu/quantization/bestla_gemm.cc b/onnxruntime/contrib_ops/cpu/quantization/neural_speed_gemm.cc
similarity index 99%
rename from onnxruntime/contrib_ops/cpu/quantization/bestla_gemm.cc
rename to onnxruntime/contrib_ops/cpu/quantization/neural_speed_gemm.cc
index 20d581c7c3fa6..7efcdf6cb43e6 100644
--- a/onnxruntime/contrib_ops/cpu/quantization/bestla_gemm.cc
+++ b/onnxruntime/contrib_ops/cpu/quantization/neural_speed_gemm.cc
@@ -6,15 +6,15 @@ Licensed under the MIT License.
 
 Module Name:
 
-    bestla_gemm.cpp
+    neural_speed_gemm.cpp
 
 Abstract:
 
-    Currently only support Q4 gemm.
+    GEMM template combinations of neural_speed.
 --*/
 
-#include "contrib_ops/cpu/quantization/bestla_defs.h"
-#include "contrib_ops/cpu/quantization/bestla_gemm.h"
+#include "contrib_ops/cpu/quantization/neural_speed_defs.h"
+#include "contrib_ops/cpu/quantization/neural_speed_gemm.h"
 #include "core/platform/threadpool.h"
 
 using ThreadPool = onnxruntime::concurrency::ThreadPool;
diff --git a/onnxruntime/contrib_ops/cpu/quantization/bestla_gemm.h b/onnxruntime/contrib_ops/cpu/quantization/neural_speed_gemm.h
similarity index 98%
rename from onnxruntime/contrib_ops/cpu/quantization/bestla_gemm.h
rename to onnxruntime/contrib_ops/cpu/quantization/neural_speed_gemm.h
index 27baacaa2ce7d..0ffece2be77f2 100644
--- a/onnxruntime/contrib_ops/cpu/quantization/bestla_gemm.h
+++ b/onnxruntime/contrib_ops/cpu/quantization/neural_speed_gemm.h
@@ -6,11 +6,11 @@ Licensed under the MIT License.
 
 Module Name:
 
-    bestla_gemm.h
+    neural_speed_gemm.h
 
 Abstract:
 
-    Currently only support Q4 gemm.
+    Prepack-weight GEMM APIs of neural_speed.
 --*/
 
 #pragma once
diff --git a/onnxruntime/contrib_ops/cpu/quantization/bestla_wrapper.h b/onnxruntime/contrib_ops/cpu/quantization/neural_speed_wrapper.h
similarity index 100%
rename from onnxruntime/contrib_ops/cpu/quantization/bestla_wrapper.h
rename to onnxruntime/contrib_ops/cpu/quantization/neural_speed_wrapper.h