diff --git a/docs/OperatorKernels.md b/docs/OperatorKernels.md index eeb8ebb3ccefe..48682475f2d3a 100644 --- a/docs/OperatorKernels.md +++ b/docs/OperatorKernels.md @@ -84,8 +84,8 @@ Do not modify directly.* |||[11, 13]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)
**T2** = tensor(int32), tensor(int64)| |DFT|*in* input:**T1**
*in* dft_length:**T2**
*in* axis:**tensor(int64)**
*out* output:**T1**

or

*in* input:**T1**
*in* dft_length:**T2**
*out* output:**T1**|20+|**T1** = tensor(double), tensor(float)
**T2** = tensor(int32), tensor(int64)| |||[17, 19]|**T1** = tensor(double), tensor(float)
**T2** = tensor(int32), tensor(int64)| -|DepthToSpace|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(double), tensor(float)| -|||[11, 12]|**T** = tensor(double), tensor(float)| +|DepthToSpace|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(double), tensor(float), tensor(uint8)| +|||[11, 12]|**T** = tensor(double), tensor(float), tensor(uint8)| |||[1, 10]|**T** = tensor(double), tensor(float)| |DequantizeLinear|*in* x:**T**
*in* x_scale:**tensor(float)**
*in* x_zero_point:**T**
*out* y:**tensor(float)**

or

*in* x:**T1**
*in* x_scale:**T2**
*in* x_zero_point:**T1**
*out* y:**T2**|21+|**T1** = tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(int16), tensor(int32), tensor(int4), tensor(int8), tensor(uint16), tensor(uint4), tensor(uint8)
**T2** = tensor(float), tensor(float16)| |||[19, 20]|**T1** = tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(int32), tensor(int8), tensor(uint8)
**T2** = tensor(float), tensor(float16)| diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index 2f98711771f1b..ae89af1f256d1 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -77,7 +77,8 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { true, cpu_ep); qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_no_int16_name, - {{"Resize", {}}}, + {{"DepthToSpace", {}}, + {"Resize", {}}}, std::move(selector_no_16bit), std::move(drop_action_no_int16)); @@ -91,7 +92,7 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { std::move(drop_action_no_int16_and_positive_scale)); std::unique_ptr selector = std::make_unique(true, false, true, providers); - // DepthToSpace and SpaceToDepth not included because there are no integer implementations. + // SpaceToDepth not included because there are no integer implementations. // https://github.com/microsoft/onnxruntime/issues/21287 qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_name, {{"Expand", {}}, diff --git a/onnxruntime/core/providers/cpu/tensor/space_depth_ops.cc b/onnxruntime/core/providers/cpu/tensor/space_depth_ops.cc index a23f80671c9ac..7e1049c402210 100644 --- a/onnxruntime/core/providers/cpu/tensor/space_depth_ops.cc +++ b/onnxruntime/core/providers/cpu/tensor/space_depth_ops.cc @@ -43,7 +43,8 @@ ONNX_CPU_OPERATOR_VERSIONED_KERNEL( 12, KernelDefBuilder() .TypeConstraint("T", {DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType()}), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}), DepthToSpace); ONNX_CPU_OPERATOR_KERNEL( @@ -51,7 +52,8 @@ ONNX_CPU_OPERATOR_KERNEL( 13, KernelDefBuilder() .TypeConstraint("T", {DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType()}), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}), DepthToSpace); // intermediate tensor shapes are: @@ -196,6 +198,19 @@ Status DepthToSpace::Compute(OpKernelContext* context) const { onnxruntime::narrow(blocksize_), onnxruntime::narrow(input_width), onnxruntime::narrow(blocksize_)); + } else if (input.IsDataType()) { + SpaceDepthOpCpuImpl(input, output, permutation, + onnxruntime::narrow(batch), + onnxruntime::narrow(dim1), + onnxruntime::narrow(blocksize_), + onnxruntime::narrow(dim3), + onnxruntime::narrow(input_height), + onnxruntime::narrow(input_width), + onnxruntime::narrow(input_depth / blocksize_ / blocksize_), + onnxruntime::narrow(input_height), + onnxruntime::narrow(blocksize_), + onnxruntime::narrow(input_width), + onnxruntime::narrow(blocksize_)); } else { // user will not see this as the kernel doesn't claim support for types other than float and double return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Unsupported input type in DepthToSpace op: ", input.DataType()); diff --git a/onnxruntime/test/providers/cpu/tensor/space_depth_ops_test.cc b/onnxruntime/test/providers/cpu/tensor/space_depth_ops_test.cc index 4954b82690e0f..d0620a794e4d5 100644 --- a/onnxruntime/test/providers/cpu/tensor/space_depth_ops_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/space_depth_ops_test.cc @@ -13,7 +13,7 @@ template class TensorOpTest : public ::testing::Test { }; -using TensorOpTestTypes = ::testing::Types; +using TensorOpTestTypes = ::testing::Types; TYPED_TEST_SUITE(TensorOpTest, TensorOpTestTypes); TEST(TensorOpTest, SpaceToDepthTest_1) { @@ -224,6 +224,7 @@ TEST(TensorOpTest, DepthToSpaceTest_1_double) { test.AddOutput("output", {N, C / (blocksize * blocksize), H * blocksize, W * blocksize}, result); test.Run(); } + TEST(TensorOpTest, DepthToSpaceTest_2) { OpTester test("DepthToSpace", 7); // create an opset 7 model constexpr int64_t blocksize = 2; @@ -308,14 +309,24 @@ TYPED_TEST(TensorOpTest, DepthToSpaceTest_3) { if constexpr (std::is_same::value) { test.AddInput("input", {N, C, H, W}, X); test.AddOutput("output", {2, 3, 6, 4}, result); - } else { + } else if constexpr (std::is_same::value) { std::vector X_fp16(X.size()); std::vector result_fp16(result.size()); - ConvertFloatToMLFloat16(result.data(), result_fp16.data(), result.size()); ConvertFloatToMLFloat16(X.data(), X_fp16.data(), X.size()); - test.AddOutput("output", {2, 3, 6, 4}, result_fp16); + ConvertFloatToMLFloat16(result.data(), result_fp16.data(), result.size()); test.AddInput("input", {N, C, H, W}, X_fp16); + test.AddOutput("output", {2, 3, 6, 4}, result_fp16); + } else if constexpr (std::is_same::value) { + std::vector X_u8(X.size()); + std::vector result_u8(result.size()); + ConvertFloatToUint8_t(X.data(), X_u8.data(), X.size()); + ConvertFloatToUint8_t(result.data(), result_u8.data(), result.size()); + test.AddInput("input", {N, C, H, W}, X_u8); + test.AddOutput("output", {2, 3, 6, 4}, result_u8); + } else { + ORT_THROW("Type not supported"); } + // TODO: Test is flaky on QNN EP (CPU backend). // Re-enable when the QnnCPUBackendTests.DISABLED_SpaceToDepth_Flaky test is fixed. test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kQnnExecutionProvider}); @@ -363,13 +374,22 @@ TYPED_TEST(TensorOpTest, DepthToSpaceTest_4) { if constexpr (std::is_same::value) { test.AddInput("input", {N, C, H, W}, X); test.AddOutput("output", {2, 3, 6, 4}, result); - } else { + } else if constexpr (std::is_same::value) { std::vector X_fp16(X.size()); std::vector result_fp16(result.size()); ConvertFloatToMLFloat16(X.data(), X_fp16.data(), X.size()); ConvertFloatToMLFloat16(result.data(), result_fp16.data(), result.size()); test.AddInput("input", {N, C, H, W}, X_fp16); test.AddOutput("output", {2, 3, 6, 4}, result_fp16); + } else if constexpr (std::is_same::value) { + std::vector X_u8(X.size()); + std::vector result_u8(result.size()); + ConvertFloatToUint8_t(X.data(), X_u8.data(), X.size()); + ConvertFloatToUint8_t(result.data(), result_u8.data(), result.size()); + test.AddInput("input", {N, C, H, W}, X_u8); + test.AddOutput("output", {2, 3, 6, 4}, result_u8); + } else { + ORT_THROW("Type not supported"); } // TODO: Test is flaky on QNN EP (CPU backend). @@ -401,14 +421,24 @@ TYPED_TEST(TensorOpTest, DepthToSpaceTest_5) { if constexpr (std::is_same::value) { test.AddInput("input", {N, C, H, W}, X); test.AddOutput("output", {1, 1, 4, 6}, result); - } else { + } else if constexpr (std::is_same::value) { std::vector X_fp16(X.size()); std::vector result_fp16(result.size()); ConvertFloatToMLFloat16(X.data(), X_fp16.data(), X.size()); ConvertFloatToMLFloat16(result.data(), result_fp16.data(), result.size()); test.AddInput("input", {N, C, H, W}, X_fp16); test.AddOutput("output", {1, 1, 4, 6}, result_fp16); + } else if constexpr (std::is_same::value) { + std::vector X_u8(X.size()); + std::vector result_u8(result.size()); + ConvertFloatToUint8_t(X.data(), X_u8.data(), X.size()); + ConvertFloatToUint8_t(result.data(), result_u8.data(), result.size()); + test.AddInput("input", {N, C, H, W}, X_u8); + test.AddOutput("output", {1, 1, 4, 6}, result_u8); + } else { + ORT_THROW("Type not supported"); } + // TODO: Test is flaky on QNN EP (CPU backend). // Re-enable when the QnnCPUBackendTests.DISABLED_SpaceToDepth_Flaky2 test is fixed. test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kQnnExecutionProvider}); diff --git a/onnxruntime/test/providers/provider_test_utils.h b/onnxruntime/test/providers/provider_test_utils.h index 87dc5980e9e50..988df1e7a5d3b 100644 --- a/onnxruntime/test/providers/provider_test_utils.h +++ b/onnxruntime/test/providers/provider_test_utils.h @@ -15,6 +15,12 @@ inline void ConvertFloatToMLFloat16(const float* f_datat, MLFloat16* h_data, siz output_vector = in_vector.template cast(); } +inline void ConvertFloatToUint8_t(const float* f_datat, uint8_t* u8_data, size_t input_size) { + auto in_vector = ConstEigenVectorMap(f_datat, input_size); + auto output_vector = EigenVectorMap(static_cast(static_cast(u8_data)), input_size); + output_vector = in_vector.template cast(); +} + inline void ConvertMLFloat16ToFloat(const MLFloat16* h_data, float* f_data, size_t input_size) { auto in_vector = ConstEigenVectorMap(static_cast(static_cast(h_data)), input_size);