diff --git a/onnxruntime/test/mlas/unittest/test_reorder_output.cpp b/onnxruntime/test/mlas/unittest/test_reorder_output.cpp index 704333fd27fa0..4dc64c87b95ed 100644 --- a/onnxruntime/test/mlas/unittest/test_reorder_output.cpp +++ b/onnxruntime/test/mlas/unittest/test_reorder_output.cpp @@ -3,6 +3,9 @@ #include "test_util.h" +#include +using namespace std::chrono; + class MlasReorderOutputTest : public MlasTestBase { private: const size_t BlockSize = MlasNchwcGetBlockSize(); @@ -27,7 +30,12 @@ class MlasReorderOutputTest : public MlasTestBase { std::fill_n(Output, OutputBufferElements, -0.5f); std::fill_n(OutputReference, OutputBufferElements, -0.5f); + auto time_start = high_resolution_clock::now(); MlasReorderOutputNchw(NchwOutputShape, Input, Output, GetMlasThreadPool()); + auto time_end = high_resolution_clock::now(); + auto t = duration_cast(time_end - time_start).count(); + std::cout << BatchCount << ":" << Channels << ":" << Height << ":" << Width << "--" << (int64_t)t << "us" << std::endl; + ReferenceReorderOutput(BatchCount, Channels, Height, Width, Input, OutputReference, false); ASSERT_EQ(memcmp(Output, OutputReference, OutputBufferElements * sizeof(float)), 0) << " [Nchw] batch=" << BatchCount << ", channels=" << Channels @@ -80,10 +88,20 @@ class MlasReorderOutputTest : public MlasTestBase { } void ExecuteShort(void) override { - for (size_t c = 1; c < 48; c++) { - Test(1, c, 112, 112); - Test(4, c, 15, 21); - Test(16, c, 11, 11); + // for (size_t c = 1; c < 48; c++) { + // Test(1, c, 112, 112); + // Test(4, c, 15, 21); + // Test(16, c, 11, 11); + // } + + for (int i = 0; i < 100; i++) { + Test(1, 1, 112, 112); + Test(1, 64, 320, 168); + Test(1, 128, 160, 84); + Test(13, 96, 4, 314); + Test(13, 240, 4, 314); + Test(30, 336, 4, 140); + Test(30, 240, 4, 140); } } };