Skip to content

Commit

Permalink
Add medium and small problem sizes for GPU targets
Browse files Browse the repository at this point in the history
Signed-off-by: erman-gurses <[email protected]>
  • Loading branch information
erman-gurses committed Oct 24, 2024
1 parent 427f406 commit 5295314
Show file tree
Hide file tree
Showing 14 changed files with 1,020 additions and 9 deletions.
6 changes: 0 additions & 6 deletions linalg_ops/convolution/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,6 @@ endforeach()
# To distinguish between CDNA(gfx9) and RDNA3(gfx11)
if(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx9")

set(_SIZES)
list(APPEND _SIZES "large")

set(_DTYPES_AND_LAYOUTS)
list(APPEND _DTYPES_AND_LAYOUTS "f16_nhwc_f16_hwcf_f32")
list(APPEND _DTYPES_AND_LAYOUTS "f16_nchw_f16_fchw_f32")
Expand Down Expand Up @@ -117,9 +114,6 @@ endforeach()

elseif(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx11")

set(_SIZES)
list(APPEND _SIZES "large")

set(_DTYPES_AND_LAYOUTS)
list(APPEND _DTYPES_AND_LAYOUTS "f16_nhwc_f16_hwcf_f32")

Expand Down
3 changes: 0 additions & 3 deletions linalg_ops/convolution/generate_test_mlir_files.sh
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,6 @@ for type_combination in ${type_combinations[@]}; do
done
done

shapes=(
"large"
)
# input_type;input_layout;kernel_type;kernel_layout;acc_type
type_and_layout_combinations=(
"f16;nhwc;f16;hwcf;f32"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%lhs: tensor<2x2x32x32xf16>, %rhs: tensor<2x2x3x3xf16>, %acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> {
%result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<2x2x3x3xf16>) outs(%acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32>
return %result: tensor<2x2x30x30xf32>
}
func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32(%lhs: tensor<2x2x32x32xf16>, %rhs: tensor<64x2x3x3xf16>, %acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> {
%result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<64x2x3x3xf16>) outs(%acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32>
return %result: tensor<2x64x30x30xf32>
}
func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f32(%lhs: tensor<2x32x32x32xf16>, %rhs: tensor<64x32x3x3xf16>, %acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> {
%result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x32xf16>, tensor<64x32x3x3xf16>) outs(%acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32>
return %result: tensor<2x64x30x30xf32>
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
builtin.module @calls attributes {

} {

func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view
func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view)
func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view
func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view
func.func private @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view

func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32_2_2_32_32_2_3_3_acc_0() attributes {
iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"}
} {
%device_index = arith.constant 0 : index
%device = hal.devices.get %device_index : !hal.device
%input_dim0 = arith.constant 2 : i64
%input_dim1 = arith.constant 2 : i64
%input_dim2 = arith.constant 32 : i64
%input_dim3 = arith.constant 32 : i64
%input_element_type = hal.element_type<f16> : i32
%input_seed = arith.constant 2 : i32
%input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
%kernel_dim0 = arith.constant 2 : i64
%kernel_dim1 = arith.constant 2 : i64
%kernel_dim2 = arith.constant 3 : i64
%kernel_dim3 = arith.constant 3 : i64
%kernel_element_type = hal.element_type<f16> : i32
%kernel_seed = arith.constant 3 : i32
%kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
%acc_dim0 = arith.constant 2 : i64
%acc_dim1 = arith.constant 2 : i64
%acc_dim2 = arith.constant 30 : i64
%acc_dim3 = arith.constant 30 : i64
%acc_element_type = hal.element_type<f32> : i32
%acc_seed = arith.constant 4 : i32
%acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
%acc_copy_dim0 = arith.constant 2 : i64
%acc_copy_dim1 = arith.constant 2 : i64
%acc_copy_dim2 = arith.constant 30 : i64
%acc_copy_dim3 = arith.constant 30 : i64
%acc_copy_element_type = hal.element_type<f32> : i32
%acc_copy_seed = arith.constant 4 : i32
%acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
%result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view
%n = arith.constant 2 : i64
%c = arith.constant 2 : i64
%h = arith.constant 32 : i64
%w = arith.constant 32 : i64
%f = arith.constant 2 : i64
%kh = arith.constant 3 : i64
%kw = arith.constant 3 : i64
%layout = arith.constant 0 : i64
%sh = arith.constant 1 : i64
%sw = arith.constant 1 : i64
%dh = arith.constant 1 : i64
%dw = arith.constant 1 : i64
call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> ()
return
}
func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32_2_2_32_32_64_3_3_acc_1() attributes {
iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x64x3x3"}
} {
%device_index = arith.constant 0 : index
%device = hal.devices.get %device_index : !hal.device
%input_dim0 = arith.constant 2 : i64
%input_dim1 = arith.constant 2 : i64
%input_dim2 = arith.constant 32 : i64
%input_dim3 = arith.constant 32 : i64
%input_element_type = hal.element_type<f16> : i32
%input_seed = arith.constant 5 : i32
%input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
%kernel_dim0 = arith.constant 64 : i64
%kernel_dim1 = arith.constant 2 : i64
%kernel_dim2 = arith.constant 3 : i64
%kernel_dim3 = arith.constant 3 : i64
%kernel_element_type = hal.element_type<f16> : i32
%kernel_seed = arith.constant 6 : i32
%kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
%acc_dim0 = arith.constant 2 : i64
%acc_dim1 = arith.constant 64 : i64
%acc_dim2 = arith.constant 30 : i64
%acc_dim3 = arith.constant 30 : i64
%acc_element_type = hal.element_type<f32> : i32
%acc_seed = arith.constant 7 : i32
%acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
%acc_copy_dim0 = arith.constant 2 : i64
%acc_copy_dim1 = arith.constant 64 : i64
%acc_copy_dim2 = arith.constant 30 : i64
%acc_copy_dim3 = arith.constant 30 : i64
%acc_copy_element_type = hal.element_type<f32> : i32
%acc_copy_seed = arith.constant 7 : i32
%acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
%result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view
%n = arith.constant 2 : i64
%c = arith.constant 2 : i64
%h = arith.constant 32 : i64
%w = arith.constant 32 : i64
%f = arith.constant 64 : i64
%kh = arith.constant 3 : i64
%kw = arith.constant 3 : i64
%layout = arith.constant 0 : i64
%sh = arith.constant 1 : i64
%sw = arith.constant 1 : i64
%dh = arith.constant 1 : i64
%dw = arith.constant 1 : i64
call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> ()
return
}
func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f32_2_32_32_32_64_3_3_acc_2() attributes {
iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x32x32x32x64x3x3"}
} {
%device_index = arith.constant 0 : index
%device = hal.devices.get %device_index : !hal.device
%input_dim0 = arith.constant 2 : i64
%input_dim1 = arith.constant 32 : i64
%input_dim2 = arith.constant 32 : i64
%input_dim3 = arith.constant 32 : i64
%input_element_type = hal.element_type<f16> : i32
%input_seed = arith.constant 8 : i32
%input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
%kernel_dim0 = arith.constant 64 : i64
%kernel_dim1 = arith.constant 32 : i64
%kernel_dim2 = arith.constant 3 : i64
%kernel_dim3 = arith.constant 3 : i64
%kernel_element_type = hal.element_type<f16> : i32
%kernel_seed = arith.constant 9 : i32
%kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
%acc_dim0 = arith.constant 2 : i64
%acc_dim1 = arith.constant 64 : i64
%acc_dim2 = arith.constant 30 : i64
%acc_dim3 = arith.constant 30 : i64
%acc_element_type = hal.element_type<f32> : i32
%acc_seed = arith.constant 10 : i32
%acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
%acc_copy_dim0 = arith.constant 2 : i64
%acc_copy_dim1 = arith.constant 64 : i64
%acc_copy_dim2 = arith.constant 30 : i64
%acc_copy_dim3 = arith.constant 30 : i64
%acc_copy_element_type = hal.element_type<f32> : i32
%acc_copy_seed = arith.constant 10 : i32
%acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
%result = call @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view
%n = arith.constant 2 : i64
%c = arith.constant 32 : i64
%h = arith.constant 32 : i64
%w = arith.constant 32 : i64
%f = arith.constant 64 : i64
%kh = arith.constant 3 : i64
%kw = arith.constant 3 : i64
%layout = arith.constant 0 : i64
%sh = arith.constant 1 : i64
%sw = arith.constant 1 : i64
%dh = arith.constant 1 : i64
%dw = arith.constant 1 : i64
call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> ()
return
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f32(%lhs: tensor<1x1x1x1xf16>, %rhs: tensor<1x1x1x1xf16>, %acc: tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32> {
%result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x1x1xf16>, tensor<1x1x1x1xf16>) outs(%acc: tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32>
return %result: tensor<1x1x1x1xf32>
}
func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f32(%lhs: tensor<1x1x16x16xf16>, %rhs: tensor<1x1x2x2xf16>, %acc: tensor<1x1x15x15xf32>) -> tensor<1x1x15x15xf32> {
%result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x16x16xf16>, tensor<1x1x2x2xf16>) outs(%acc: tensor<1x1x15x15xf32>) -> tensor<1x1x15x15xf32>
return %result: tensor<1x1x15x15xf32>
}
func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%lhs: tensor<2x2x32x32xf16>, %rhs: tensor<2x2x3x3xf16>, %acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> {
%result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<2x2x3x3xf16>) outs(%acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32>
return %result: tensor<2x2x30x30xf32>
}
Loading

0 comments on commit 5295314

Please sign in to comment.