Add medium and small problem sizes for GPU targets

Signed-off-by: erman-gurses <[email protected]>
iree-org · Oct 24, 2024 · 5295314 · 5295314
1 parent 427f406
commit 5295314
Show file tree

Hide file tree

Showing 14 changed files with 1,020 additions and 9 deletions.
diff --git a/linalg_ops/convolution/CMakeLists.txt b/linalg_ops/convolution/CMakeLists.txt
@@ -83,9 +83,6 @@ endforeach()
 # To distinguish between CDNA(gfx9) and RDNA3(gfx11)
 if(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx9")
 
-set(_SIZES)
-list(APPEND _SIZES "large")
-
 set(_DTYPES_AND_LAYOUTS)
 list(APPEND _DTYPES_AND_LAYOUTS "f16_nhwc_f16_hwcf_f32")
 list(APPEND _DTYPES_AND_LAYOUTS "f16_nchw_f16_fchw_f32")
@@ -117,9 +114,6 @@ endforeach()
 
 elseif(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx11")
 
-set(_SIZES)
-list(APPEND _SIZES "large")
-
 set(_DTYPES_AND_LAYOUTS)
 list(APPEND _DTYPES_AND_LAYOUTS "f16_nhwc_f16_hwcf_f32")
 

diff --git a/linalg_ops/convolution/generate_test_mlir_files.sh b/linalg_ops/convolution/generate_test_mlir_files.sh
@@ -77,9 +77,6 @@ for type_combination in ${type_combinations[@]}; do
   done
 done
 
-shapes=(
-  "large"
-)
 # input_type;input_layout;kernel_type;kernel_layout;acc_type
 type_and_layout_combinations=(
   "f16;nhwc;f16;hwcf;f32"

diff --git a/..._ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_medium.mlir b/..._ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_medium.mlir
@@ -0,0 +1,12 @@
+func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%lhs: tensor<2x2x32x32xf16>, %rhs: tensor<2x2x3x3xf16>, %acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> {
+  %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<2x2x3x3xf16>) outs(%acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32>
+  return %result: tensor<2x2x30x30xf32>
+}
+func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32(%lhs: tensor<2x2x32x32xf16>, %rhs: tensor<64x2x3x3xf16>, %acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> {
+  %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<64x2x3x3xf16>) outs(%acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32>
+  return %result: tensor<2x64x30x30xf32>
+}
+func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f32(%lhs: tensor<2x32x32x32xf16>, %rhs: tensor<64x32x3x3xf16>, %acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> {
+  %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x32xf16>, tensor<64x32x3x3xf16>) outs(%acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32>
+  return %result: tensor<2x64x30x30xf32>
+}
diff --git a/...onvolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_medium_calls.mlir b/...onvolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_medium_calls.mlir
@@ -0,0 +1,158 @@
+builtin.module @calls attributes {
+
+} {
+
+func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view
+func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view)
+func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view
+func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view
+func.func private @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view
+
+func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32_2_2_32_32_2_3_3_acc_0() attributes {
+  iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"}
+} {
+  %device_index = arith.constant 0 : index
+  %device = hal.devices.get %device_index : !hal.device
+  %input_dim0 = arith.constant 2 : i64
+  %input_dim1 = arith.constant 2 : i64
+  %input_dim2 = arith.constant 32 : i64
+  %input_dim3 = arith.constant 32 : i64
+  %input_element_type = hal.element_type<f16> : i32
+  %input_seed = arith.constant 2 : i32
+  %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %kernel_dim0 = arith.constant 2 : i64
+  %kernel_dim1 = arith.constant 2 : i64
+  %kernel_dim2 = arith.constant 3 : i64
+  %kernel_dim3 = arith.constant 3 : i64
+  %kernel_element_type = hal.element_type<f16> : i32
+  %kernel_seed = arith.constant 3 : i32
+  %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %acc_dim0 = arith.constant 2 : i64
+  %acc_dim1 = arith.constant 2 : i64
+  %acc_dim2 = arith.constant 30 : i64
+  %acc_dim3 = arith.constant 30 : i64
+  %acc_element_type = hal.element_type<f32> : i32
+  %acc_seed = arith.constant 4 : i32
+  %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %acc_copy_dim0 = arith.constant 2 : i64
+  %acc_copy_dim1 = arith.constant 2 : i64
+  %acc_copy_dim2 = arith.constant 30 : i64
+  %acc_copy_dim3 = arith.constant 30 : i64
+  %acc_copy_element_type = hal.element_type<f32> : i32
+  %acc_copy_seed = arith.constant 4 : i32
+  %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view
+  %n = arith.constant 2 : i64
+  %c = arith.constant 2 : i64
+  %h = arith.constant 32 : i64
+  %w = arith.constant 32 : i64
+  %f = arith.constant 2 : i64
+  %kh = arith.constant 3 : i64
+  %kw = arith.constant 3 : i64
+  %layout = arith.constant 0 : i64
+  %sh = arith.constant 1 : i64
+  %sw = arith.constant 1 : i64
+  %dh = arith.constant 1 : i64
+  %dw = arith.constant 1 : i64
+  call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> ()
+  return
+}
+func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32_2_2_32_32_64_3_3_acc_1() attributes {
+  iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x64x3x3"}
+} {
+  %device_index = arith.constant 0 : index
+  %device = hal.devices.get %device_index : !hal.device
+  %input_dim0 = arith.constant 2 : i64
+  %input_dim1 = arith.constant 2 : i64
+  %input_dim2 = arith.constant 32 : i64
+  %input_dim3 = arith.constant 32 : i64
+  %input_element_type = hal.element_type<f16> : i32
+  %input_seed = arith.constant 5 : i32
+  %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %kernel_dim0 = arith.constant 64 : i64
+  %kernel_dim1 = arith.constant 2 : i64
+  %kernel_dim2 = arith.constant 3 : i64
+  %kernel_dim3 = arith.constant 3 : i64
+  %kernel_element_type = hal.element_type<f16> : i32
+  %kernel_seed = arith.constant 6 : i32
+  %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %acc_dim0 = arith.constant 2 : i64
+  %acc_dim1 = arith.constant 64 : i64
+  %acc_dim2 = arith.constant 30 : i64
+  %acc_dim3 = arith.constant 30 : i64
+  %acc_element_type = hal.element_type<f32> : i32
+  %acc_seed = arith.constant 7 : i32
+  %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %acc_copy_dim0 = arith.constant 2 : i64
+  %acc_copy_dim1 = arith.constant 64 : i64
+  %acc_copy_dim2 = arith.constant 30 : i64
+  %acc_copy_dim3 = arith.constant 30 : i64
+  %acc_copy_element_type = hal.element_type<f32> : i32
+  %acc_copy_seed = arith.constant 7 : i32
+  %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view
+  %n = arith.constant 2 : i64
+  %c = arith.constant 2 : i64
+  %h = arith.constant 32 : i64
+  %w = arith.constant 32 : i64
+  %f = arith.constant 64 : i64
+  %kh = arith.constant 3 : i64
+  %kw = arith.constant 3 : i64
+  %layout = arith.constant 0 : i64
+  %sh = arith.constant 1 : i64
+  %sw = arith.constant 1 : i64
+  %dh = arith.constant 1 : i64
+  %dw = arith.constant 1 : i64
+  call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> ()
+  return
+}
+func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f32_2_32_32_32_64_3_3_acc_2() attributes {
+  iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x32x32x32x64x3x3"}
+} {
+  %device_index = arith.constant 0 : index
+  %device = hal.devices.get %device_index : !hal.device
+  %input_dim0 = arith.constant 2 : i64
+  %input_dim1 = arith.constant 32 : i64
+  %input_dim2 = arith.constant 32 : i64
+  %input_dim3 = arith.constant 32 : i64
+  %input_element_type = hal.element_type<f16> : i32
+  %input_seed = arith.constant 8 : i32
+  %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %kernel_dim0 = arith.constant 64 : i64
+  %kernel_dim1 = arith.constant 32 : i64
+  %kernel_dim2 = arith.constant 3 : i64
+  %kernel_dim3 = arith.constant 3 : i64
+  %kernel_element_type = hal.element_type<f16> : i32
+  %kernel_seed = arith.constant 9 : i32
+  %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %acc_dim0 = arith.constant 2 : i64
+  %acc_dim1 = arith.constant 64 : i64
+  %acc_dim2 = arith.constant 30 : i64
+  %acc_dim3 = arith.constant 30 : i64
+  %acc_element_type = hal.element_type<f32> : i32
+  %acc_seed = arith.constant 10 : i32
+  %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %acc_copy_dim0 = arith.constant 2 : i64
+  %acc_copy_dim1 = arith.constant 64 : i64
+  %acc_copy_dim2 = arith.constant 30 : i64
+  %acc_copy_dim3 = arith.constant 30 : i64
+  %acc_copy_element_type = hal.element_type<f32> : i32
+  %acc_copy_seed = arith.constant 10 : i32
+  %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %result = call @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view
+  %n = arith.constant 2 : i64
+  %c = arith.constant 32 : i64
+  %h = arith.constant 32 : i64
+  %w = arith.constant 32 : i64
+  %f = arith.constant 64 : i64
+  %kh = arith.constant 3 : i64
+  %kw = arith.constant 3 : i64
+  %layout = arith.constant 0 : i64
+  %sh = arith.constant 1 : i64
+  %sw = arith.constant 1 : i64
+  %dh = arith.constant 1 : i64
+  %dw = arith.constant 1 : i64
+  call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> ()
+  return
+}
+}
diff --git a/...g_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_small.mlir b/...g_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_small.mlir
@@ -0,0 +1,12 @@
+func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f32(%lhs: tensor<1x1x1x1xf16>, %rhs: tensor<1x1x1x1xf16>, %acc: tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32> {
+  %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x1x1xf16>, tensor<1x1x1x1xf16>) outs(%acc: tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32>
+  return %result: tensor<1x1x1x1xf32>
+}
+func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f32(%lhs: tensor<1x1x16x16xf16>, %rhs: tensor<1x1x2x2xf16>, %acc: tensor<1x1x15x15xf32>) -> tensor<1x1x15x15xf32> {
+  %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x16x16xf16>, tensor<1x1x2x2xf16>) outs(%acc: tensor<1x1x15x15xf32>) -> tensor<1x1x15x15xf32>
+  return %result: tensor<1x1x15x15xf32>
+}
+func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%lhs: tensor<2x2x32x32xf16>, %rhs: tensor<2x2x3x3xf16>, %acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> {
+  %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<2x2x3x3xf16>) outs(%acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32>
+  return %result: tensor<2x2x30x30xf32>
+}