diff --git a/docs/OperatorKernels.md b/docs/OperatorKernels.md
index eeb8ebb3ccefe..48682475f2d3a 100644
--- a/docs/OperatorKernels.md
+++ b/docs/OperatorKernels.md
@@ -84,8 +84,8 @@ Do not modify directly.*
|||[11, 13]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)
**T2** = tensor(int32), tensor(int64)|
|DFT|*in* input:**T1**
*in* dft_length:**T2**
*in* axis:**tensor(int64)**
*out* output:**T1**
or
*in* input:**T1**
*in* dft_length:**T2**
*out* output:**T1**|20+|**T1** = tensor(double), tensor(float)
**T2** = tensor(int32), tensor(int64)|
|||[17, 19]|**T1** = tensor(double), tensor(float)
**T2** = tensor(int32), tensor(int64)|
-|DepthToSpace|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(double), tensor(float)|
-|||[11, 12]|**T** = tensor(double), tensor(float)|
+|DepthToSpace|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(double), tensor(float), tensor(uint8)|
+|||[11, 12]|**T** = tensor(double), tensor(float), tensor(uint8)|
|||[1, 10]|**T** = tensor(double), tensor(float)|
|DequantizeLinear|*in* x:**T**
*in* x_scale:**tensor(float)**
*in* x_zero_point:**T**
*out* y:**tensor(float)**
or
*in* x:**T1**
*in* x_scale:**T2**
*in* x_zero_point:**T1**
*out* y:**T2**|21+|**T1** = tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(int16), tensor(int32), tensor(int4), tensor(int8), tensor(uint16), tensor(uint4), tensor(uint8)
**T2** = tensor(float), tensor(float16)|
|||[19, 20]|**T1** = tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(int32), tensor(int8), tensor(uint8)
**T2** = tensor(float), tensor(float16)|
diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc
index 2f98711771f1b..ae89af1f256d1 100644
--- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc
+++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc
@@ -77,7 +77,8 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) {
true,
cpu_ep);
qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_no_int16_name,
- {{"Resize", {}}},
+ {{"DepthToSpace", {}},
+ {"Resize", {}}},
std::move(selector_no_16bit),
std::move(drop_action_no_int16));
@@ -91,7 +92,7 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) {
std::move(drop_action_no_int16_and_positive_scale));
std::unique_ptr selector = std::make_unique(true, false, true, providers);
- // DepthToSpace and SpaceToDepth not included because there are no integer implementations.
+ // SpaceToDepth not included because there are no integer implementations.
// https://github.com/microsoft/onnxruntime/issues/21287
qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_name,
{{"Expand", {}},
diff --git a/onnxruntime/core/providers/cpu/tensor/space_depth_ops.cc b/onnxruntime/core/providers/cpu/tensor/space_depth_ops.cc
index a23f80671c9ac..7e1049c402210 100644
--- a/onnxruntime/core/providers/cpu/tensor/space_depth_ops.cc
+++ b/onnxruntime/core/providers/cpu/tensor/space_depth_ops.cc
@@ -43,7 +43,8 @@ ONNX_CPU_OPERATOR_VERSIONED_KERNEL(
12,
KernelDefBuilder()
.TypeConstraint("T", {DataTypeImpl::GetTensorType(),
- DataTypeImpl::GetTensorType()}),
+ DataTypeImpl::GetTensorType(),
+ DataTypeImpl::GetTensorType()}),
DepthToSpace);
ONNX_CPU_OPERATOR_KERNEL(
@@ -51,7 +52,8 @@ ONNX_CPU_OPERATOR_KERNEL(
13,
KernelDefBuilder()
.TypeConstraint("T", {DataTypeImpl::GetTensorType(),
- DataTypeImpl::GetTensorType()}),
+ DataTypeImpl::GetTensorType(),
+ DataTypeImpl::GetTensorType()}),
DepthToSpace);
// intermediate tensor shapes are:
@@ -196,6 +198,19 @@ Status DepthToSpace::Compute(OpKernelContext* context) const {
onnxruntime::narrow(blocksize_),
onnxruntime::narrow(input_width),
onnxruntime::narrow(blocksize_));
+ } else if (input.IsDataType()) {
+ SpaceDepthOpCpuImpl(input, output, permutation,
+ onnxruntime::narrow(batch),
+ onnxruntime::narrow(dim1),
+ onnxruntime::narrow(blocksize_),
+ onnxruntime::narrow(dim3),
+ onnxruntime::narrow(input_height),
+ onnxruntime::narrow(input_width),
+ onnxruntime::narrow(input_depth / blocksize_ / blocksize_),
+ onnxruntime::narrow(input_height),
+ onnxruntime::narrow(blocksize_),
+ onnxruntime::narrow(input_width),
+ onnxruntime::narrow(blocksize_));
} else {
// user will not see this as the kernel doesn't claim support for types other than float and double
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Unsupported input type in DepthToSpace op: ", input.DataType());
diff --git a/onnxruntime/test/providers/cpu/tensor/space_depth_ops_test.cc b/onnxruntime/test/providers/cpu/tensor/space_depth_ops_test.cc
index 4954b82690e0f..d0620a794e4d5 100644
--- a/onnxruntime/test/providers/cpu/tensor/space_depth_ops_test.cc
+++ b/onnxruntime/test/providers/cpu/tensor/space_depth_ops_test.cc
@@ -13,7 +13,7 @@ template
class TensorOpTest : public ::testing::Test {
};
-using TensorOpTestTypes = ::testing::Types;
+using TensorOpTestTypes = ::testing::Types;
TYPED_TEST_SUITE(TensorOpTest, TensorOpTestTypes);
TEST(TensorOpTest, SpaceToDepthTest_1) {
@@ -224,6 +224,7 @@ TEST(TensorOpTest, DepthToSpaceTest_1_double) {
test.AddOutput("output", {N, C / (blocksize * blocksize), H * blocksize, W * blocksize}, result);
test.Run();
}
+
TEST(TensorOpTest, DepthToSpaceTest_2) {
OpTester test("DepthToSpace", 7); // create an opset 7 model
constexpr int64_t blocksize = 2;
@@ -308,14 +309,24 @@ TYPED_TEST(TensorOpTest, DepthToSpaceTest_3) {
if constexpr (std::is_same::value) {
test.AddInput("input", {N, C, H, W}, X);
test.AddOutput("output", {2, 3, 6, 4}, result);
- } else {
+ } else if constexpr (std::is_same::value) {
std::vector X_fp16(X.size());
std::vector result_fp16(result.size());
- ConvertFloatToMLFloat16(result.data(), result_fp16.data(), result.size());
ConvertFloatToMLFloat16(X.data(), X_fp16.data(), X.size());
- test.AddOutput("output", {2, 3, 6, 4}, result_fp16);
+ ConvertFloatToMLFloat16(result.data(), result_fp16.data(), result.size());
test.AddInput("input", {N, C, H, W}, X_fp16);
+ test.AddOutput("output", {2, 3, 6, 4}, result_fp16);
+ } else if constexpr (std::is_same::value) {
+ std::vector X_u8(X.size());
+ std::vector result_u8(result.size());
+ ConvertFloatToUint8_t(X.data(), X_u8.data(), X.size());
+ ConvertFloatToUint8_t(result.data(), result_u8.data(), result.size());
+ test.AddInput("input", {N, C, H, W}, X_u8);
+ test.AddOutput("output", {2, 3, 6, 4}, result_u8);
+ } else {
+ ORT_THROW("Type not supported");
}
+
// TODO: Test is flaky on QNN EP (CPU backend).
// Re-enable when the QnnCPUBackendTests.DISABLED_SpaceToDepth_Flaky test is fixed.
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kQnnExecutionProvider});
@@ -363,13 +374,22 @@ TYPED_TEST(TensorOpTest, DepthToSpaceTest_4) {
if constexpr (std::is_same::value) {
test.AddInput("input", {N, C, H, W}, X);
test.AddOutput("output", {2, 3, 6, 4}, result);
- } else {
+ } else if constexpr (std::is_same::value) {
std::vector X_fp16(X.size());
std::vector result_fp16(result.size());
ConvertFloatToMLFloat16(X.data(), X_fp16.data(), X.size());
ConvertFloatToMLFloat16(result.data(), result_fp16.data(), result.size());
test.AddInput("input", {N, C, H, W}, X_fp16);
test.AddOutput("output", {2, 3, 6, 4}, result_fp16);
+ } else if constexpr (std::is_same::value) {
+ std::vector X_u8(X.size());
+ std::vector result_u8(result.size());
+ ConvertFloatToUint8_t(X.data(), X_u8.data(), X.size());
+ ConvertFloatToUint8_t(result.data(), result_u8.data(), result.size());
+ test.AddInput("input", {N, C, H, W}, X_u8);
+ test.AddOutput("output", {2, 3, 6, 4}, result_u8);
+ } else {
+ ORT_THROW("Type not supported");
}
// TODO: Test is flaky on QNN EP (CPU backend).
@@ -401,14 +421,24 @@ TYPED_TEST(TensorOpTest, DepthToSpaceTest_5) {
if constexpr (std::is_same::value) {
test.AddInput("input", {N, C, H, W}, X);
test.AddOutput("output", {1, 1, 4, 6}, result);
- } else {
+ } else if constexpr (std::is_same::value) {
std::vector X_fp16(X.size());
std::vector result_fp16(result.size());
ConvertFloatToMLFloat16(X.data(), X_fp16.data(), X.size());
ConvertFloatToMLFloat16(result.data(), result_fp16.data(), result.size());
test.AddInput("input", {N, C, H, W}, X_fp16);
test.AddOutput("output", {1, 1, 4, 6}, result_fp16);
+ } else if constexpr (std::is_same::value) {
+ std::vector X_u8(X.size());
+ std::vector result_u8(result.size());
+ ConvertFloatToUint8_t(X.data(), X_u8.data(), X.size());
+ ConvertFloatToUint8_t(result.data(), result_u8.data(), result.size());
+ test.AddInput("input", {N, C, H, W}, X_u8);
+ test.AddOutput("output", {1, 1, 4, 6}, result_u8);
+ } else {
+ ORT_THROW("Type not supported");
}
+
// TODO: Test is flaky on QNN EP (CPU backend).
// Re-enable when the QnnCPUBackendTests.DISABLED_SpaceToDepth_Flaky2 test is fixed.
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kQnnExecutionProvider});
diff --git a/onnxruntime/test/providers/provider_test_utils.h b/onnxruntime/test/providers/provider_test_utils.h
index 87dc5980e9e50..988df1e7a5d3b 100644
--- a/onnxruntime/test/providers/provider_test_utils.h
+++ b/onnxruntime/test/providers/provider_test_utils.h
@@ -15,6 +15,12 @@ inline void ConvertFloatToMLFloat16(const float* f_datat, MLFloat16* h_data, siz
output_vector = in_vector.template cast();
}
+inline void ConvertFloatToUint8_t(const float* f_datat, uint8_t* u8_data, size_t input_size) {
+ auto in_vector = ConstEigenVectorMap(f_datat, input_size);
+ auto output_vector = EigenVectorMap(static_cast(static_cast(u8_data)), input_size);
+ output_vector = in_vector.template cast();
+}
+
inline void ConvertMLFloat16ToFloat(const MLFloat16* h_data, float* f_data, size_t input_size) {
auto in_vector =
ConstEigenVectorMap(static_cast(static_cast(h_data)), input_size);