Skip to content

Commit

Permalink
ORT 1.20.0 release preparation: Cherry pick round 2 (microsoft#22643)
Browse files Browse the repository at this point in the history
ORT 1.20.0 release preparation: Cherry pick round 2

Approved commits

---------

Co-authored-by: Hector Li <[email protected]>
Co-authored-by: ivberg <[email protected]>
  • Loading branch information
3 people authored Oct 29, 2024
1 parent 2d00351 commit c4fb724
Show file tree
Hide file tree
Showing 9 changed files with 138 additions and 79 deletions.
3 changes: 2 additions & 1 deletion docs/OperatorKernels.md
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,8 @@ Do not modify directly.*
|||12|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)<br/> **T1** = tensor(double), tensor(float), tensor(int32), tensor(int64)|
|||[7, 11]|**T** = tensor(double), tensor(float)|
|QLinearConv|*in* x:**T1**<br> *in* x_scale:**tensor(float)**<br> *in* x_zero_point:**T1**<br> *in* w:**T2**<br> *in* w_scale:**tensor(float)**<br> *in* w_zero_point:**T2**<br> *in* y_scale:**tensor(float)**<br> *in* y_zero_point:**T3**<br> *in* B:**T4**<br> *out* y:**T3**|10+|**T1** = tensor(int8), tensor(uint8)<br/> **T2** = tensor(int8), tensor(uint8)<br/> **T3** = tensor(int8), tensor(uint8)<br/> **T4** = tensor(int32)|
|QLinearMatMul|*in* a:**T1**<br> *in* a_scale:**TS**<br> *in* a_zero_point:**T1**<br> *in* b:**T2**<br> *in* b_scale:**TS**<br> *in* b_zero_point:**T2**<br> *in* y_scale:**TS**<br> *in* y_zero_point:**T3**<br> *out* y:**T3**<br><br>or<br><br>*in* a:**T1**<br> *in* a_scale:**tensor(float)**<br> *in* a_zero_point:**T1**<br> *in* b:**T2**<br> *in* b_scale:**tensor(float)**<br> *in* b_zero_point:**T2**<br> *in* y_scale:**tensor(float)**<br> *in* y_zero_point:**T3**<br> *out* y:**T3**|10+|**T1** = tensor(int8), tensor(uint8)<br/> **T2** = tensor(int8), tensor(uint8)<br/> **T3** = tensor(int8), tensor(uint8)|
|QLinearMatMul|*in* a:**T1**<br> *in* a_scale:**TS**<br> *in* a_zero_point:**T1**<br> *in* b:**T2**<br> *in* b_scale:**TS**<br> *in* b_zero_point:**T2**<br> *in* y_scale:**TS**<br> *in* y_zero_point:**T3**<br> *out* y:**T3**<br><br>or<br><br>*in* a:**T1**<br> *in* a_scale:**tensor(float)**<br> *in* a_zero_point:**T1**<br> *in* b:**T2**<br> *in* b_scale:**tensor(float)**<br> *in* b_zero_point:**T2**<br> *in* y_scale:**tensor(float)**<br> *in* y_zero_point:**T3**<br> *out* y:**T3**|21+|**T1** = tensor(int8), tensor(uint8)<br/> **T2** = tensor(int8), tensor(uint8)<br/> **T3** = tensor(int8), tensor(uint8)<br/> **TS** = tensor(float)|
|||[10, 20]|**T1** = tensor(int8), tensor(uint8)<br/> **T2** = tensor(int8), tensor(uint8)<br/> **T3** = tensor(int8), tensor(uint8)|
|QuantizeLinear|*in* x:**T1**<br> *in* y_scale:**T1**<br> *in* y_zero_point:**T2**<br> *out* y:**T2**<br><br>or<br><br>*in* x:**T1**<br> *in* y_scale:**tensor(float)**<br> *in* y_zero_point:**T2**<br> *out* y:**T2**|21+|**T1** = tensor(float), tensor(float16)<br/> **T2** = tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(int16), tensor(int4), tensor(int8), tensor(uint16), tensor(uint4), tensor(uint8)|
|||[19, 20]|**T1** = tensor(float), tensor(float16)<br/> **T2** = tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(int8), tensor(uint8)|
|||[13, 18]|**T1** = tensor(float)<br/> **T2** = tensor(int8), tensor(uint8)|
Expand Down
20 changes: 14 additions & 6 deletions onnxruntime/core/providers/cpu/cpu_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -379,8 +379,10 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOn
QuantizeLinear);
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 12, int8_t,
QuantizeLinear);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, uint8_t, QLinearMatMul);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, int8_t, QLinearMatMul);
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 20, uint8_t,
QLinearMatMul);
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 20, int8_t,
QLinearMatMul);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, uint8_t, MatMulInteger);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, int8_t, MatMulInteger);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, ConvInteger);
Expand Down Expand Up @@ -1108,6 +1110,8 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain,
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 21, int16_t, DequantizeLinear);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 21, Int4x2, DequantizeLinear);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 21, UInt4x2, DequantizeLinear);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 21, uint8_t, QLinearMatMul);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 21, int8_t, QLinearMatMul);
#if !defined(DISABLE_FLOAT8_TYPES)
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 21, Float8E4M3FN, DequantizeLinear);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 21, Float8E4M3FNUZ, DequantizeLinear);
Expand Down Expand Up @@ -1691,10 +1695,10 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {
uint8_t, QuantizeLinear)>,
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 12,
int8_t, QuantizeLinear)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, uint8_t,
QLinearMatMul)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, int8_t,
QLinearMatMul)>,
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 20,
uint8_t, QLinearMatMul)>,
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 20,
int8_t, QLinearMatMul)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, uint8_t,
MatMulInteger)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, int8_t,
Expand Down Expand Up @@ -2769,6 +2773,10 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {
DequantizeLinear)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 21, UInt4x2,
DequantizeLinear)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 21, uint8_t,
QLinearMatMul)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 21, int8_t,
QLinearMatMul)>,
#if !defined(DISABLE_FLOAT8_TYPES)
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 21, Float8E4M3FN,
DequantizeLinear)>,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@

namespace onnxruntime {
// uint8_t kernel supports weight being either uint8_t or int8_t
ONNX_OPERATOR_TYPED_KERNEL_EX(
ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(
QLinearMatMul,
kOnnxDomain,
10,
20,
uint8_t,
kCpuExecutionProvider,
KernelDefBuilder()
Expand All @@ -26,21 +27,45 @@ ONNX_OPERATOR_TYPED_KERNEL_EX(
.TypeConstraint("T3", DataTypeImpl::GetTensorType<uint8_t>()),
QLinearMatMul);

ONNX_OPERATOR_TYPED_KERNEL_EX(
QLinearMatMul,
kOnnxDomain,
21,
uint8_t,
kCpuExecutionProvider,
KernelDefBuilder()
.TypeConstraint("TS", DataTypeImpl::GetTensorType<float>())
.TypeConstraint("T1", DataTypeImpl::GetTensorType<uint8_t>())
.TypeConstraint("T2", {DataTypeImpl::GetTensorType<uint8_t>(), DataTypeImpl::GetTensorType<int8_t>()})
.TypeConstraint("T3", DataTypeImpl::GetTensorType<uint8_t>()),
QLinearMatMul);

// int8_t kernel only supports weight being int8_t
#define REGISTER_QLINEARMATMUL_INT8_KERNEL() \
ONNX_OPERATOR_TYPED_KERNEL_EX( \
QLinearMatMul, \
kOnnxDomain, \
10, \
int8_t, \
kCpuExecutionProvider, \
KernelDefBuilder() \
.TypeConstraint("T1", DataTypeImpl::GetTensorType<int8_t>()) \
.TypeConstraint("T2", DataTypeImpl::GetTensorType<int8_t>()) \
.TypeConstraint("T3", DataTypeImpl::GetTensorType<int8_t>()), \
QLinearMatMul);

REGISTER_QLINEARMATMUL_INT8_KERNEL();
ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(
QLinearMatMul,
kOnnxDomain,
10,
20,
int8_t,
kCpuExecutionProvider,
KernelDefBuilder()
.TypeConstraint("T1", DataTypeImpl::GetTensorType<int8_t>())
.TypeConstraint("T2", DataTypeImpl::GetTensorType<int8_t>())
.TypeConstraint("T3", DataTypeImpl::GetTensorType<int8_t>()),
QLinearMatMul);

ONNX_OPERATOR_TYPED_KERNEL_EX(
QLinearMatMul,
kOnnxDomain,
21,
int8_t,
kCpuExecutionProvider,
KernelDefBuilder()
.TypeConstraint("TS", DataTypeImpl::GetTensorType<float>())
.TypeConstraint("T1", DataTypeImpl::GetTensorType<int8_t>())
.TypeConstraint("T2", DataTypeImpl::GetTensorType<int8_t>())
.TypeConstraint("T3", DataTypeImpl::GetTensorType<int8_t>()),
QLinearMatMul);

Status QLinearMatMul::Compute(OpKernelContext* ctx) const {
const auto* a = ctx->Input<Tensor>(IN_A);
Expand Down
16 changes: 13 additions & 3 deletions onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -302,13 +302,21 @@ QnnLog_Level_t QnnBackendManager::MapOrtSeverityToQNNLogLevel(logging::Severity
}

Status QnnBackendManager::ResetQnnLogLevel() {
auto ort_log_level = logger_->GetSeverity();
LOGS(*logger_, INFO) << "Reset Qnn log level to ORT Logger level: " << (unsigned int)ort_log_level;
return UpdateQnnLogLevel(ort_log_level);
std::lock_guard<std::mutex> lock(logger_mutex_);

if (backend_setup_completed_ && logger_ != nullptr) {
auto ort_log_level = logger_->GetSeverity();
LOGS(*logger_, INFO) << "Reset Qnn log level to ORT Logger level: " << (unsigned int)ort_log_level;
return UpdateQnnLogLevel(ort_log_level);
}
return Status::OK();
}

Status QnnBackendManager::UpdateQnnLogLevel(logging::Severity ort_log_level) {
ORT_RETURN_IF(nullptr == log_handle_, "Unable to update QNN Log Level. Invalid QNN log handle.");
ORT_RETURN_IF(false == backend_setup_completed_, "Unable to update QNN Log Level. Backend setup not completed.");
ORT_RETURN_IF(nullptr == logger_, "Unable to update QNN Log Level. Invalid logger.");

QnnLog_Level_t qnn_log_level = MapOrtSeverityToQNNLogLevel(ort_log_level);

LOGS(*logger_, INFO) << "Updating Qnn log level to: " << qnn_log_level;
Expand Down Expand Up @@ -686,6 +694,7 @@ Status QnnBackendManager::LoadCachedQnnContextFromBuffer(char* buffer, uint64_t
}

Status QnnBackendManager::SetupBackend(const logging::Logger& logger, bool load_from_cached_context) {
std::lock_guard<std::mutex> lock(logger_mutex_);
if (backend_setup_completed_) {
LOGS(logger, VERBOSE) << "Backend setup already!";
return Status::OK();
Expand Down Expand Up @@ -972,6 +981,7 @@ void QnnBackendManager::ReleaseResources() {
ORT_THROW("Failed to ShutdownBackend.");
}

std::lock_guard<std::mutex> lock(logger_mutex_);
result = TerminateQnnLog();
if (Status::OK() != result) {
ORT_THROW("Failed to TerminateQnnLog.");
Expand Down
3 changes: 3 additions & 0 deletions onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@
#endif

#include <memory>
#include <mutex>
#include <string>
#include <unordered_map>
#include <vector>

#include "HTP/QnnHtpDevice.h"
#include "QnnLog.h"
#include "QnnTypes.h"
Expand Down Expand Up @@ -233,6 +235,7 @@ class QnnBackendManager {

private:
const std::string backend_path_;
std::mutex logger_mutex_;
const logging::Logger* logger_ = nullptr;
QNN_INTERFACE_VER_TYPE qnn_interface_ = QNN_INTERFACE_VER_TYPE_INIT;
QNN_SYSTEM_INTERFACE_VER_TYPE qnn_sys_interface_ = QNN_SYSTEM_INTERFACE_VER_TYPE_INIT;
Expand Down
90 changes: 46 additions & 44 deletions onnxruntime/core/providers/qnn/qnn_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -258,49 +258,6 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio
}
}

#ifdef _WIN32
auto& etwRegistrationManager = logging::EtwRegistrationManager::Instance();
// Register callback for ETW capture state (rundown)
callback_ETWSink_provider_ = onnxruntime::logging::EtwRegistrationManager::EtwInternalCallback(
[&etwRegistrationManager, this](
LPCGUID SourceId,
ULONG IsEnabled,
UCHAR Level,
ULONGLONG MatchAnyKeyword,
ULONGLONG MatchAllKeyword,
PEVENT_FILTER_DESCRIPTOR FilterData,
PVOID CallbackContext) {
ORT_UNUSED_PARAMETER(SourceId);
ORT_UNUSED_PARAMETER(MatchAnyKeyword);
ORT_UNUSED_PARAMETER(MatchAllKeyword);
ORT_UNUSED_PARAMETER(FilterData);
ORT_UNUSED_PARAMETER(CallbackContext);

if (IsEnabled == EVENT_CONTROL_CODE_ENABLE_PROVIDER) {
if ((MatchAnyKeyword & static_cast<ULONGLONG>(onnxruntime::logging::ORTTraceLoggingKeyword::Logs)) != 0) {
auto ortETWSeverity = etwRegistrationManager.MapLevelToSeverity();
(void)qnn_backend_manager_->UpdateQnnLogLevel(ortETWSeverity);
}
if ((MatchAnyKeyword & static_cast<ULONGLONG>(onnxruntime::logging::ORTTraceLoggingKeyword::Profiling)) != 0) {
if (Level != 0) {
// Commenting out Dynamic QNN Profiling for now
// There seems to be a crash in 3rd party QC QnnHtp.dll with this.
// Repro Scenario - start ETW tracing prior to session creation.
// Then disable/enable ETW Tracing with the code below uncommented a few times
// auto profiling_level_etw = GetProfilingLevelFromETWLevel(Level);
// (void)qnn_backend_manager_->SetProfilingLevelETW(profiling_level_etw);
}
}
}

if (IsEnabled == EVENT_CONTROL_CODE_DISABLE_PROVIDER) {
// (void)qnn_backend_manager_->SetProfilingLevelETW(qnn::ProfilingLevel::INVALID);
(void)qnn_backend_manager_->ResetQnnLogLevel();
}
});
etwRegistrationManager.RegisterInternalCallback(callback_ETWSink_provider_);
#endif

// In case ETW gets disabled later
auto profiling_level_pos = provider_options_map.find(PROFILING_LEVEL);
if (profiling_level_pos != provider_options_map.end()) {
Expand Down Expand Up @@ -440,6 +397,49 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio
htp_arch,
soc_model,
enable_htp_weight_sharing_);

#ifdef _WIN32
auto& etwRegistrationManager = logging::EtwRegistrationManager::Instance();
// Register callback for ETW capture state (rundown)
callback_ETWSink_provider_ = onnxruntime::logging::EtwRegistrationManager::EtwInternalCallback(
[&etwRegistrationManager, this](
LPCGUID SourceId,
ULONG IsEnabled,
UCHAR Level,
ULONGLONG MatchAnyKeyword,
ULONGLONG MatchAllKeyword,
PEVENT_FILTER_DESCRIPTOR FilterData,
PVOID CallbackContext) {
ORT_UNUSED_PARAMETER(SourceId);
ORT_UNUSED_PARAMETER(MatchAnyKeyword);
ORT_UNUSED_PARAMETER(MatchAllKeyword);
ORT_UNUSED_PARAMETER(FilterData);
ORT_UNUSED_PARAMETER(CallbackContext);

if (IsEnabled == EVENT_CONTROL_CODE_ENABLE_PROVIDER) {
if ((MatchAnyKeyword & static_cast<ULONGLONG>(onnxruntime::logging::ORTTraceLoggingKeyword::Logs)) != 0) {
auto ortETWSeverity = etwRegistrationManager.MapLevelToSeverity();
(void)qnn_backend_manager_->UpdateQnnLogLevel(ortETWSeverity);
}
if ((MatchAnyKeyword & static_cast<ULONGLONG>(onnxruntime::logging::ORTTraceLoggingKeyword::Profiling)) != 0) {
if (Level != 0) {
// Commenting out Dynamic QNN Profiling for now
// There seems to be a crash in 3rd party QC QnnHtp.dll with this.
// Repro Scenario - start ETW tracing prior to session creation.
// Then disable/enable ETW Tracing with the code below uncommented a few times
// auto profiling_level_etw = GetProfilingLevelFromETWLevel(Level);
// (void)qnn_backend_manager_->SetProfilingLevelETW(profiling_level_etw);
}
}
}

if (IsEnabled == EVENT_CONTROL_CODE_DISABLE_PROVIDER) {
// (void)qnn_backend_manager_->SetProfilingLevelETW(qnn::ProfilingLevel::INVALID);
(void)qnn_backend_manager_->ResetQnnLogLevel();
}
});
etwRegistrationManager.RegisterInternalCallback(callback_ETWSink_provider_);
#endif
}

QNNExecutionProvider::~QNNExecutionProvider() {
Expand All @@ -453,7 +453,9 @@ QNNExecutionProvider::~QNNExecutionProvider() {

// Unregister the ETW callback
#ifdef _WIN32
logging::EtwRegistrationManager::Instance().UnregisterInternalCallback(callback_ETWSink_provider_);
if (callback_ETWSink_provider_ != nullptr) {
logging::EtwRegistrationManager::Instance().UnregisterInternalCallback(callback_ETWSink_provider_);
}
#endif
}

Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/qnn/qnn_execution_provider.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ class QNNExecutionProvider : public IExecutionProvider {
bool enable_HTP_FP16_precision_ = true;
bool share_ep_contexts_ = false;
#ifdef _WIN32
onnxruntime::logging::EtwRegistrationManager::EtwInternalCallback callback_ETWSink_provider_;
onnxruntime::logging::EtwRegistrationManager::EtwInternalCallback callback_ETWSink_provider_ = nullptr;
#endif
qnn::ModelSettings model_settings_ = {};

Expand Down
8 changes: 7 additions & 1 deletion onnxruntime/test/onnx/TestCase.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1026,7 +1026,13 @@ std::unique_ptr<std::set<BrokenTest>> GetBrokenTests(const std::string& provider
{"dequantizelinear_int4", "Bug with model input name 'zero_point' not matching node's input name", {}},
{"dequantizelinear_uint4", "Bug with model input name 'zero_point' not matching node's input name", {}},
{"quantizelinear_int4", "Bug with model input name 'zero_point' not matching node's input name", {}},
{"quantizelinear_uint4", "Bug with model input name 'zero_point' not matching node's input name", {}}});
{"quantizelinear_uint4", "Bug with model input name 'zero_point' not matching node's input name", {}},
{"qlinearmatmul_2D_int8_float16", "fp16 type ont supported by CPU EP", {}},
{"qlinearmatmul_2D_int8_float32", "result diff", {}},
{"qlinearmatmul_2D_uint8_float16", "fp16 type ont supported by CPU EP", {}},
{"qlinearmatmul_3D_int8_float16", "fp16 type ont supported by CPU EP", {}},
{"qlinearmatmul_3D_int8_float32", "result diff", {}},
{"qlinearmatmul_3D_uint8_float16", "fp16 type ont supported by CPU EP", {}}});

// Some EPs may fail to pass some specific testcases.
// For example TenosrRT EP may fail on FLOAT16 related testcases if GPU doesn't support float16.
Expand Down
Loading

0 comments on commit c4fb724

Please sign in to comment.