From fc08f821b9e94aadff80616fa97d76bef5f16b1b Mon Sep 17 00:00:00 2001 From: erman-gurses Date: Mon, 21 Oct 2024 00:36:02 -0500 Subject: [PATCH 01/16] Add integration for the conv2d (CPU) test suite. Signed-off-by: erman-gurses --- linalg_ops/CMakeLists.txt | 21 + linalg_ops/convolution/CMakeLists.txt | 112 +++ .../convolution/generate_e2e_conv2d_tests.py | 693 ++++++++++++++++ .../convolution/generate_test_mlir_files.sh | 105 +++ .../f16_f16_f16/conv2d_f16_f16_f16_large.mlir | 10 + .../conv2d_f16_f16_f16_large_calls.mlir | 112 +++ .../conv2d_f16_f16_f16_medium.mlir | 15 + .../conv2d_f16_f16_f16_medium_calls.mlir | 163 ++++ .../f16_f16_f16/conv2d_f16_f16_f16_small.mlir | 15 + .../conv2d_f16_f16_f16_small_calls.mlir | 163 ++++ .../conv2d_winograd_f16_f16_f16_large.mlir | 10 + ...nv2d_winograd_f16_f16_f16_large_calls.mlir | 112 +++ .../conv2d_winograd_f16_f16_f16_medium.mlir | 15 + ...v2d_winograd_f16_f16_f16_medium_calls.mlir | 163 ++++ .../conv2d_winograd_f16_f16_f16_small.mlir | 15 + ...nv2d_winograd_f16_f16_f16_small_calls.mlir | 163 ++++ .../f32_f32_f32/conv2d_f32_f32_f32_large.mlir | 10 + .../conv2d_f32_f32_f32_large_calls.mlir | 112 +++ .../conv2d_f32_f32_f32_medium.mlir | 15 + .../conv2d_f32_f32_f32_medium_calls.mlir | 163 ++++ .../f32_f32_f32/conv2d_f32_f32_f32_small.mlir | 15 + .../conv2d_f32_f32_f32_small_calls.mlir | 163 ++++ .../conv2d_winograd_f32_f32_f32_large.mlir | 10 + ...nv2d_winograd_f32_f32_f32_large_calls.mlir | 112 +++ .../conv2d_winograd_f32_f32_f32_medium.mlir | 15 + ...v2d_winograd_f32_f32_f32_medium_calls.mlir | 163 ++++ .../conv2d_winograd_f32_f32_f32_small.mlir | 15 + ...nv2d_winograd_f32_f32_f32_small_calls.mlir | 163 ++++ linalg_ops/iree-e2e-conv2d-test.cc | 775 ++++++++++++++++++ linalg_ops/test_utils.c | 6 +- linalg_ops/test_utils.h | 1 + 31 files changed, 3621 insertions(+), 4 deletions(-) create mode 100644 linalg_ops/convolution/CMakeLists.txt create mode 100644 linalg_ops/convolution/generate_e2e_conv2d_tests.py create mode 100755 linalg_ops/convolution/generate_test_mlir_files.sh create mode 100644 linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_large.mlir create mode 100644 linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_large_calls.mlir create mode 100644 linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_medium.mlir create mode 100644 linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_medium_calls.mlir create mode 100644 linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_small.mlir create mode 100644 linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_small_calls.mlir create mode 100644 linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_large.mlir create mode 100644 linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_large_calls.mlir create mode 100644 linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_medium.mlir create mode 100644 linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_medium_calls.mlir create mode 100644 linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_small.mlir create mode 100644 linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_small_calls.mlir create mode 100644 linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_large.mlir create mode 100644 linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_large_calls.mlir create mode 100644 linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_medium.mlir create mode 100644 linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_medium_calls.mlir create mode 100644 linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_small.mlir create mode 100644 linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_small_calls.mlir create mode 100644 linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_large.mlir create mode 100644 linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_large_calls.mlir create mode 100644 linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_medium.mlir create mode 100644 linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_medium_calls.mlir create mode 100644 linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_small.mlir create mode 100644 linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_small_calls.mlir create mode 100644 linalg_ops/iree-e2e-conv2d-test.cc diff --git a/linalg_ops/CMakeLists.txt b/linalg_ops/CMakeLists.txt index 08cf318..c6a14aa 100644 --- a/linalg_ops/CMakeLists.txt +++ b/linalg_ops/CMakeLists.txt @@ -114,6 +114,26 @@ iree_cc_binary( iree::vm::cc ) +iree_cc_binary( + NAME + iree-e2e-conv2d-test + SRCS + "iree-e2e-conv2d-test.cc" + DEPS + ::test_utils + iree::base + iree::base::internal + iree::base::internal::cpu + iree::base::internal::flags + iree::base::internal::path + iree::hal + iree::modules::hal + iree::tooling::context_util + iree::tooling::device_util + iree::vm + iree::vm::cc +) + #------------------------------------------------------------------------------- # Tests #------------------------------------------------------------------------------- @@ -123,3 +143,4 @@ include(iree_test_suites_native_test) include(iree_test_suites_runner_test) add_subdirectory(matmul) +add_subdirectory(convolution) \ No newline at end of file diff --git a/linalg_ops/convolution/CMakeLists.txt b/linalg_ops/convolution/CMakeLists.txt new file mode 100644 index 0000000..e9a57b5 --- /dev/null +++ b/linalg_ops/convolution/CMakeLists.txt @@ -0,0 +1,112 @@ +# Copyright 2024 The IREE Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# TODO(scotttodd): add filtering here, in the helper functions, or in ctest to +# choose which tests to compile and run + +set(_SIZES) +list(APPEND _SIZES "large") +list(APPEND _SIZES "medium") +list(APPEND _SIZES "small") + +############################################################################### +# +# CPU - llvm-cpu on local-task, default flags. +# +############################################################################### + + +set(_DTYPES) +list(APPEND _DTYPES "f16_f16_f16") +list(APPEND _DTYPES "f32_f32_f32") + +foreach(_DTYPE IN LISTS _DTYPES) + foreach(_SIZE IN LISTS _SIZES) + iree_test_suites_runner_test( + NAME + conv2d_${_DTYPE}_${_SIZE} + TESTS_SRC + "generated/${_DTYPE}/conv2d_${_DTYPE}_${_SIZE}.mlir" + CALLS_SRC + "generated/${_DTYPE}/conv2d_${_DTYPE}_${_SIZE}_calls.mlir" + TEST_RUNNER + iree-test-suites_iree-e2e-conv2d-test + TARGET_BACKEND + "llvm-cpu" + DRIVER + "local-task" + COMPILER_FLAGS + RUNNER_FLAGS + LABELS + "hostonly" + "local" + ) + endforeach() +endforeach() + +foreach(_DTYPE IN LISTS _DTYPES) + foreach(_SIZE IN LISTS _SIZES) + iree_test_suites_runner_test( + NAME + conv2d_winograd_${_DTYPE}_${_SIZE} + TESTS_SRC + "generated/${_DTYPE}/conv2d_${_DTYPE}_${_SIZE}.mlir" + CALLS_SRC + "generated/${_DTYPE}/conv2d_${_DTYPE}_${_SIZE}_calls.mlir" + TEST_RUNNER + iree-test-suites_iree-e2e-conv2d-test + TARGET_BACKEND + "llvm-cpu" + DRIVER + "local-task" + COMPILER_FLAGS + "--iree-preprocessing-pass-pipeline=builtin.module\(func.func\(iree-linalg-ext-convert-conv2d-to-winograd{replace-all-convs=true}\)\)" + RUNNER_FLAGS + LABELS + "hostonly" + "local" + TARGET_CPU_FEATURES_VARIANTS + "default" + ) + endforeach() +endforeach() + +############################################################################### +# +# GPU - ROCm/HIP, default flags. +# +############################################################################### + +# if(IREE_HIP_TEST_TARGET_CHIP) + +# set(_DTYPES) +# list(APPEND _DTYPES "f16_f16_f16") +# list(APPEND _DTYPES "f32_f32_f32") + +# foreach(_DTYPE IN LISTS _DTYPES) +# foreach(_SIZE IN LISTS _SIZES) +# iree_test_suites_runner_test( +# NAME +# matmul_hip_${_DTYPE}_${_SIZE} +# TESTS_SRC +# "generated/${_DTYPE}/matmul_${_DTYPE}_${_SIZE}.mlir" +# CALLS_SRC +# "generated/${_DTYPE}/matmul_${_DTYPE}_${_SIZE}_calls.mlir" +# TEST_RUNNER +# iree-test-suites_iree-e2e-matmul-test +# TARGET_BACKEND +# "rocm" +# DRIVER +# "hip" +# COMPILER_FLAGS +# "--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}" +# RUNNER_FLAGS +# LABELS +# ) +# endforeach() +# endforeach() + +# endif() \ No newline at end of file diff --git a/linalg_ops/convolution/generate_e2e_conv2d_tests.py b/linalg_ops/convolution/generate_e2e_conv2d_tests.py new file mode 100644 index 0000000..a492eb4 --- /dev/null +++ b/linalg_ops/convolution/generate_e2e_conv2d_tests.py @@ -0,0 +1,693 @@ +#!/usr/bin/env python3 +# Copyright 2024 The IREE Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +"""Generator for e2e conv2d tests. +""" + +from typing import Optional +import argparse +import enum +import dataclasses +import typing +import math +import itertools +import re + +# Data type of kernel entries. The string values must match MLIR data types. +@enum.unique +class KernelElemTypeId(enum.Enum): + NONE = "" + I8 = "i8" + F32 = "f32" + F16 = "f16" + + +# Data type of input entries. The string values must match MLIR data types. +@enum.unique +class InputElemTypeId(enum.Enum): + NONE = "" + I8 = "i8" + F32 = "f32" + F16 = "f16" + + +# Data type of input entries. The string values must match MLIR data types. +@enum.unique +class AccElemTypeId(enum.Enum): + NONE = "" + I32 = "i32" + F32 = "f32" + F16 = "f16" + +# Enumerates of the collections of shapes that we can generate tests for. +# The values are the accepted values for the --shapes= flag. +@enum.unique +class ShapesId(enum.Enum): + SMALL = "small" + MEDIUM = "medium" + LARGE = "large" + + +# Enumerates ways to construct MLIR tensor types. +# TODO: Enable dynamic dimensions once the tests start passing. +@enum.unique +class Dynamicity(enum.Enum): + DYNAMIC = "dynamic" # Use '?' everywhere. Example: tensor. + STATIC = "static" # Use fixed values everywhere. Example: tensor<4x6xf32>. + MIXED = "mixed" # Randomly mix '?' and values. Example: tensor. + + +# TODO: Add more input layouts as needed. The layout determines the dim of input and kernel. +@enum.unique +class InputLayout(enum.Enum): + NCHW = "nchw" + NHWC = "nhwc" + + +# TODO: Add more kernel layouts as needed. +@enum.unique +class KernelLayout(enum.Enum): + FCHW = "fchw" + HWCF = "hwcf" + + +# Describes the shape of a tensor conv2d in the usual convention: +# the input is {n}x{c}x{h}x{w}, the kernel is {f}x{c}x{kh}x{kw}, the accumulator/result is +# {n}x{f}x{oh}x{ow}. +# The extra `accumulate` boolean tells whether the conv2d is accumulating into +# an existing accumulator (C += A * B) or just overwriting the result +# (C = A * B). +@dataclasses.dataclass +class TestShape: + n: int + c: int + h: int + w: int + kh: int + kw: int + f: int + accumulate: bool + + +# Attributes for the linalg.conv2d operation. +@dataclasses.dataclass +class ConvAttrs: + STRIDE: typing.Tuple[int, int] = (1, 1) + DILATION: typing.Tuple[int, int] = (1, 1) + + +# Returns the list of TestShape's to use for the collection of shapes +# identified by shapes_id. +def get_test_shapes(shapes_id: ShapesId): + # Notes: + # 1. Be conservative in adding more shapes, as that can increase both the + # build and execution latency of tests. The build latency is nearly the + # same for all shapes, while execution latency grows linearly with + # n*f*ow*oh*kh*kw. + + if shapes_id == ShapesId.SMALL: + return [ + TestShape(n=1, c=1, h=1, w=1, kh=1, kw=1, f=1, accumulate=True), + TestShape(n=1, c=1, h=16, w=16, kh=2, kw=2, f=1, accumulate=True), + TestShape(n=2, c=2, h=32, w=32, kh=3, kw=3, f=2, accumulate=True), + ] + if shapes_id == ShapesId.MEDIUM: + return [ + TestShape(n=2, c=2, h=32, w=32, kh=3, kw=3, f=2, accumulate=True), + TestShape(n=2, c=2, h=32, w=32, kh=3, kw=3, f=64, accumulate=True), + TestShape(n=2, h=32, w=32, c=32, kh=3, kw=3, f=64, accumulate=True), + ] + if shapes_id == ShapesId.LARGE: + return [ + TestShape(n=2, c=4, h=128, w=128, kh=3, kw=3, f=8, accumulate=True), + TestShape(n=2, c=3, h=128, w=128, kh=3, kw=3, f=12, accumulate=True), + ] + + raise ValueError(shapes_id) + + +# A shape dimension value, i.e. a size value that could appear in a MLIR type +# such as 'tensor'. None means a dynamic size, similar to '?' in MLIR. +@dataclasses.dataclass +class DimSize: + value: typing.Optional[int] + + +# Generates a compile-time MLIR size value, i.e. either a fixed positive integer +# or None (which maps to MLIR '?') depending on dynamicity. +def shape_dim(x: int, dynamicity: Dynamicity): + if dynamicity == Dynamicity.DYNAMIC: + return DimSize(None) + elif dynamicity == Dynamicity.STATIC: + return DimSize(x) + else: + raise ValueError(dynamicity) + + +# Stringification used for generating MLIR types, e.g. tensor. +def int_or_question_mark(s: DimSize): + return s.value or "?" + + +# Stringification used for generating alphanumeric identifiers, e.g. +# func.func @somefunction_DYNxDYNxf32, where we can't use "?" characters. +def int_or_DYN(s: DimSize): + return s.value or "DYN" + + +# Determines the shape of input and kernel tensors. +@dataclasses.dataclass +class TestInputTensorShapes: + n: DimSize + c: DimSize + h: DimSize + w: DimSize + kh: DimSize + kw: DimSize + f: DimSize + + +# Helper for generate_function. Generates TestInputTensorShapes, i.e. +# converts from the runtime shape dimensions in TestShape and given dynamicity to +# the set of shapes to be used in a test function's input tensors. +def generate_shapes(shape: TestShape, dynamicity: Dynamicity): + n = shape_dim(shape.n, dynamicity) + c = shape_dim(shape.c, dynamicity) + h = shape_dim(shape.h, dynamicity) + w = shape_dim(shape.w, dynamicity) + kh = shape_dim(shape.kh, dynamicity) + kw = shape_dim(shape.kw, dynamicity) + f = shape_dim(shape.f, dynamicity) + shapes = TestInputTensorShapes( + n=n, + c=c, + h=h, + w=w, + kh=kh, + kw=kw, + f=f, + ) + return shapes + + +# Helper to calculate the output shape based on the input shape, kernel shape, +# dilation and stride. +def calc_out_shape(i_shape: int, k_shape: int, dilation_val: int, stride_val: int): + x = (k_shape - 1) * (dilation_val - 1) + x = i_shape - k_shape - x + return math.floor(x / stride_val) + 1 + + +# Helper to return input, kernel and output shapes based on the layout and Conv2dParams. +def get_tensor_shape( + shapes: TestShape, + kernel_layout: KernelLayout, + input_layout: InputLayout, + conv_attr: ConvAttrs, +): + n = shapes.n + c = shapes.c + h = shapes.h + w = shapes.w + kh = shapes.kh + kw = shapes.kw + f = shapes.f + + # Extract input dimensions + input_height, input_width = h, w + + # Extract kernel dimensions + kernel_height, kernel_width = kh, kw + + # Get the dilation and stride + dilation = conv_attr.DILATION + stride = conv_attr.STRIDE + + # Calculate output height. + oh = calc_out_shape(input_height, kernel_height, dilation[0], stride[0]) + # Calculate output width. + ow = calc_out_shape(input_width, kernel_width, dilation[1], stride[1]) + + input_tensor_shape, kernel_tensor_shape, output_tensor_shape = [], [], [] + + if input_layout == InputLayout.NCHW: + input_tensor_shape = [n, c, h, w] + output_tensor_shape = [n, f, oh, ow] + elif input_layout == InputLayout.NHWC: + input_tensor_shape = [n, h, w, c] + output_tensor_shape = [n, oh, ow, f] + else: + raise ValueError(input_layout) + + if kernel_layout == KernelLayout.FCHW: + kernel_tensor_shape = [f, c, kh, kw] + elif kernel_layout == KernelLayout.HWCF: + kernel_tensor_shape = [kh, kw, c, f] + else: + raise ValueError(kernel_layout) + + return input_tensor_shape, kernel_tensor_shape, output_tensor_shape + + +# Helper for generate_function. +# Generates a name for a test function in the generated MLIR code. +def generate_function_name( + input_type: InputElemTypeId, + kernel_type: KernelElemTypeId, + output_type: AccElemTypeId, + shapes: TestInputTensorShapes, + accumulate: bool, +): + input_t = input_type.value + kernel_t = kernel_type.value + acc_t = output_type.value + n = int_or_DYN(shapes.n) + c = int_or_DYN(shapes.c) + h = int_or_DYN(shapes.h) + w = int_or_DYN(shapes.w) + kh = int_or_DYN(shapes.kh) + kw = int_or_DYN(shapes.kw) + f = int_or_DYN(shapes.f) + + conv2d_kind = "conv2d_accumulate" if accumulate else "conv2d" + return ( + f"{conv2d_kind}_{n}_{c}_{h}_{w}_times_" + + f"{kh}_{kw}_{f}_dtype_{input_t}_{kernel_t}_{acc_t}" + ) + + +# Represents a generated test function. +@dataclasses.dataclass +class MLIRFunction: + name: str + signature: str + import_declaration: str + definition: str + + +# Generates a test function in the generated MLIR code. +# The generated function will take the same arguments as linalg.conv2d variants +# and will just call linalg.conv2d variants with them, returning its result. +def generate_function( + input_type: InputElemTypeId, + input_layout: InputLayout, + kernel_type: KernelElemTypeId, + kernel_layout: KernelLayout, + acc_type: AccElemTypeId, + conv2d_attr: ConvAttrs, + shape: TestShape, + dynamicity: Dynamicity, +): + shapes = generate_shapes(shape, dynamicity) + func_name = generate_function_name( + input_type, + kernel_type, + acc_type, + shapes, + shape.accumulate, + ) + + input_shape, kernel_shape, output_shape = get_tensor_shape( + shape, kernel_layout, input_layout, conv2d_attr + ) + input_tensor_type = f"tensor<{input_shape[0]}x{input_shape[1]}x{input_shape[2]}x{input_shape[3]}x{input_type.value}>" + kernel_tensor_type = f"tensor<{kernel_shape[0]}x{kernel_shape[1]}x{kernel_shape[2]}x{kernel_shape[3]}x{kernel_type.value}>" + + acc_tensor_type = f"tensor<{output_shape[0]}x{output_shape[1]}x{output_shape[2]}x{output_shape[3]}x{acc_type.value}>" + + op_name = None + if input_layout == InputLayout.NCHW: + if kernel_layout == KernelLayout.FCHW: + op_name = "linalg.conv_2d_nchw_fchw" + if kernel_layout == KernelLayout.HWCF: + op_name = "linalg.conv_2d_nchw_hwcf" + elif input_layout == InputLayout.NHWC: + if kernel_layout == KernelLayout.HWCF: + op_name = "linalg.conv_2d_nhwc_hwcf" + + if op_name is None: + raise ValueError("Invalid combination of input_layout and kernel_layout") + + conv_attr = f"{{dilations = dense<{list(conv2d_attr.DILATION)}> : tensor<2xi64>, strides = dense<{list(conv2d_attr.STRIDE)}> : tensor<2xi64>}}" + + # Compilation info is optional; prints empty string by default. + func_definition = "" + + signature = f"({input_tensor_type}, {kernel_tensor_type}, {acc_tensor_type}) -> {acc_tensor_type}" + import_declaration = f"func.func private @module.{func_name}(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view" + func_definition = func_definition + ( + f"func.func @{func_name}(%lhs: {input_tensor_type}, %rhs: {kernel_tensor_type}, %acc: {acc_tensor_type}) -> {acc_tensor_type} {{\n" + f" %result = {op_name} {conv_attr} ins(%lhs, %rhs: {input_tensor_type}, {kernel_tensor_type}) outs(%acc: {acc_tensor_type}) -> {acc_tensor_type}\n" + f" return %result: {acc_tensor_type}\n" + f"}}\n" + ) + + return MLIRFunction( + name=func_name, + signature=signature, + import_declaration=import_declaration, + definition=func_definition, + ) + + +# Represents a call to a generated test function. +@dataclasses.dataclass +class TestCall: + function: MLIRFunction + op: str + + +# Enumerates ways to initialize tensor buffer contents. +@enum.unique +class TensorGenerator(enum.Enum): + ZERO = "zero" # Fill with zeros + RANDOM = "random" # Fill with (deterministic) pseudorandom values. + + +# Intentionally fixed seed! We want full reproducibility here, both across runs +# and across machines. +# Intentionally not shared with local_pseudorandom_state to limit the ways +# in which shuffling testcases changes which random values are generated. +pseudorandom_generator_seed = 1 + + +# Generate a 4d tensor function argument of the given size as `%name`. +def generate_random_4d_tensor( + name: str, + tensor_shape: list, + element_type: typing.Union[InputElemTypeId, KernelElemTypeId], +): + global pseudorandom_generator_seed + pseudorandom_generator_seed = pseudorandom_generator_seed + 1 + return ( + f" %{name}_dim0 = arith.constant {tensor_shape[0]} : i64\n" + f" %{name}_dim1 = arith.constant {tensor_shape[1]} : i64\n" + f" %{name}_dim2 = arith.constant {tensor_shape[2]} : i64\n" + f" %{name}_dim3 = arith.constant {tensor_shape[3]} : i64\n" + f" %{name}_element_type = hal.element_type<{element_type.value}> : i32\n" + f" %{name}_seed = arith.constant {pseudorandom_generator_seed} : i32\n" + f" %{name} = call @conv2d_test.generate_random_tensor(%device, %{name}_dim0, %{name}_dim1, %{name}_dim2, %{name}_dim3, %{name}_element_type, %{name}_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view\n" + ) + + +call_id = 0 + + +def generate_call( + function: MLIRFunction, + input_type: InputElemTypeId, + input_layout: InputLayout, + kernel_type: KernelElemTypeId, + kernel_layout: KernelLayout, + conv2d_attr: ConvAttrs, + acc_type: AccElemTypeId, + shape: TestShape, +): + global call_id + func_name = f"{function.name}_{shape.n}_{shape.c}_{shape.h}_{shape.w}_{shape.f}_{shape.kh}_{shape.kw}" + if shape.accumulate: + func_name = f"{func_name}_acc" + func_name = f"{func_name}_{call_id}" + call_id = call_id + 1 + + # layout of output tensor for checking correctness + layout = -1 + + if input_layout == InputLayout.NCHW: + if kernel_layout == KernelLayout.FCHW or kernel_layout == KernelLayout.HWCF: + layout = 0 # for output tensor NxFxOHxOW + else: + raise ValueError(kernel_layout) + elif input_layout == InputLayout.NHWC: + if kernel_layout == KernelLayout.HWCF: + layout = 1 # for output tensor NxOHxOWxF + else: + raise ValueError(kernel_layout) + else: + raise ValueError(InputLayout) + + description = f"Conv2d shape (NxCxHxWxFxKHxKW): {shape.n}x{shape.c}x{shape.h}x{shape.w}x{shape.f}x{shape.kh}x{shape.kw}" + op = ( + f"func.func @{func_name}() attributes {{\n" + f' iree.reflection = {{description = "{description}"}}\n' + "} {\n" + " %device_index = arith.constant 0 : index\n" + " %device = hal.devices.get %device_index : !hal.device\n" + ) + + inp_shape, kernel_shape, out_shape = get_tensor_shape( + shape, + kernel_layout, + input_layout, + conv2d_attr, + ) + + op = op + generate_random_4d_tensor("input", inp_shape, input_type) + op = op + generate_random_4d_tensor("kernel", kernel_shape, kernel_type) + if shape.accumulate: + op = op + generate_random_4d_tensor("acc", out_shape, acc_type) + # TODO(#16168): there's a bug with in-place input->output aliasing and + # we work around it here by passing in a unique copy. + global pseudorandom_generator_seed + pseudorandom_generator_seed = pseudorandom_generator_seed - 1 + op = op + generate_random_4d_tensor("acc_copy", out_shape, acc_type) + op = op + ( + f" %result = call @module.{function.name}(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view\n" + ) + else: + op = op + ( + f" %acc = util.null : !hal.buffer_view\n" + f" %result = call @module.{function.name}(%input, %kernel) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view\n" + ) + + op = op + ( + f" %n = arith.constant {shape.n} : i64\n" + f" %c = arith.constant {shape.c} : i64\n" + f" %h = arith.constant {shape.h} : i64\n" + f" %w = arith.constant {shape.w} : i64\n" + f" %f = arith.constant {shape.f} : i64\n" + f" %kh = arith.constant {shape.kh} : i64\n" + f" %kw = arith.constant {shape.kw} : i64\n" + f" %layout = arith.constant {layout} : i64\n" + f" %sh = arith.constant {conv2d_attr.STRIDE[0]} : i64\n" + f" %sw = arith.constant {conv2d_attr.STRIDE[1]} : i64\n" + f" %dh = arith.constant {conv2d_attr.DILATION[0]} : i64\n" + f" %dw = arith.constant {conv2d_attr.DILATION[1]} : i64\n" + f" call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> ()\n") + + op = op + " return\n" + op = op + "}\n" + + return TestCall(function=function, op=op) + + +# Generates all output files' contents as strings. +def generate( + input_elem_type: InputElemTypeId, + input_layout: InputLayout, + kernel_elem_type: KernelElemTypeId, + kernel_layout: KernelLayout, + conv2d_attr: ConvAttrs, + acc_type: AccElemTypeId, + shapes_id: ShapesId, +): + functions = {} + calls = [] + + for shape in get_test_shapes(shapes_id): + for dynamicity in [Dynamicity.STATIC]: + function = generate_function( + input_elem_type, + input_layout, + kernel_elem_type, + kernel_layout, + acc_type, + conv2d_attr, + shape, + dynamicity, + ) + # Different testcases may differ only by runtime parameters but + # share the same code. For example, dynamic-shapes testcases + # share the same code involing tensor even though the runtime + # value in the trace are different. That's why we append conditionally + # to calls, but unconditionally to function_definitions. + if function.name not in functions: + functions[function.name] = function + calls.append( + generate_call( + function, + input_elem_type, + input_layout, + kernel_elem_type, + kernel_layout, + conv2d_attr, + acc_type, + shape, + ) + ) + + return (functions, calls) + + +def parse_arguments(): + parser = argparse.ArgumentParser(description="Generator of e2e conv2d tests") + parser.add_argument( + "--output_conv2d_mlir", + type=str, + help="Path of output .mlir file containing the generated conv2d functions", + required=True, + ) + parser.add_argument( + "--output_calls_mlir", + type=str, + help="Path of output .mlir file containing the calls", + required=True, + ) + parser.add_argument( + "--input_type", + type=str, + choices=["i8", "f32", "f16"], + help="Numeric type of input tensors", + required=True, + ) + parser.add_argument( + "--input_layout", + type=str, + default="nchw", + choices=["nchw", "nhwc"], + help="Layout of the input tensor. Currently, only nchw is supported.", + required=False, + ) + parser.add_argument( + "--kernel_type", + type=str, + choices=["i8", "f32", "f16"], + help="Numeric type of input tensors", + required=True, + ) + parser.add_argument( + "--kernel_layout", + type=str, + default="fchw", + choices=["fchw", "hwcf"], + help="Layout of kernel tensor. Currently, only fchw is supported.", + required=False, + ) + parser.add_argument( + "--acc_type", + type=str, + choices=["i32", "f32", "f16"], + help="Numeric type of input tensors", + default="", + required=False, + ) + parser.add_argument( + "--shapes", + type=str, + choices=[s.value for s in ShapesId], + help="Collection of tensor shapes to test", + required=True, + ) + parser.add_argument( + "--dilation", + type=str, + default="1,1", + help="The dilation factor for the convolution operation. Comma-separated. As in 1,1", + required=False, + ) + parser.add_argument( + "--stride", + type=str, + default="1,1", + help="The stride factor for the convolution operation. Comma-separated. As in 1,1", + required=False, + ) + parser.add_argument( + "--requirements", + type=str, + help="Target requirements for this module. Comma-separated. As in -iree-llvmcpu-target-cpu-features. If the target device does not meet all of the requirements, the test will be skipped.", + required=False, + ) + return parser.parse_args() + + +def write_code_file(functions, filename): + with open(filename, "w") as file: + for function in functions.values(): + file.write(function.definition + "\n") + + +def write_calls_file(functions, calls, filename, requirements): + # Module-level reflection information used to control the test tool. + reflection = "" + if requirements: + reflection = ( + "iree.reflection = {" + 'target_features = "' + + ",".join([req.lstrip("+") for req in requirements.split(",")]) + + '"' + "}" + ) + module_definition = ( + f"builtin.module @calls attributes {{\n" f" {reflection}\n" f"}} {{\n\n" + ) + + # Declare the custom module that generates arguments. + module_definition = module_definition + ( + "func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view\n" + "func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view)\n" + "\n" + + ) + + # Declare the functions that will be called. + for function in functions.values(): + module_definition = module_definition + function.import_declaration + "\n" + module_definition = module_definition + "\n" + + # Emit the test cases for each call. + for call in calls: + module_definition = module_definition + call.op + "\n" + + module_definition = module_definition + "\n}\n" + + with open(filename, "w") as file: + file.write(module_definition) + + +def main(args): + input_type = InputElemTypeId(args.input_type) + input_layout = InputLayout(args.input_layout) + kernel_type = KernelElemTypeId(args.kernel_type) + kernel_layout = KernelLayout(args.kernel_layout) + acc_type = AccElemTypeId(args.acc_type) + shapes_id = ShapesId(args.shapes) + conv2d_attr = ConvAttrs( + tuple(map(int, args.stride.split(","))), + tuple(map(int, args.dilation.split(","))), + ) + + (functions, calls) = generate( + input_type, + input_layout, + kernel_type, + kernel_layout, + conv2d_attr, + acc_type, + shapes_id, + ) + + write_code_file(functions, args.output_conv2d_mlir) + write_calls_file( + functions, + calls, + args.output_calls_mlir, + args.requirements, + ) + + +if __name__ == "__main__": + main(parse_arguments()) \ No newline at end of file diff --git a/linalg_ops/convolution/generate_test_mlir_files.sh b/linalg_ops/convolution/generate_test_mlir_files.sh new file mode 100755 index 0000000..35c4a7a --- /dev/null +++ b/linalg_ops/convolution/generate_test_mlir_files.sh @@ -0,0 +1,105 @@ +#!/bin/bash + +# Copyright 2024 The IREE Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# This script runs generate_e2e_conv2d_tests for all argument combinations that +# we are interested in testing. +# +# The output is a 'generated' folder with contents like this: +# linalg_ops/ +# convolution/ +# generated/ +# f16_f16_f16/ +# conv2d_f16_f16_f16_large_calls.mlir +# conv2d_f16_f16_f16_large.mlir +# conv2d_f16_f16_f16_medium_calls.mlir +# conv2d_f16_f16_f16_medium.mlir +# conv2d_f16_f16_f16_small_calls.mlir +# conv2d_f16_f16_f16_small.mlir +# conv2d_winograd_f16_f16_f16_large_calls.mlir +# conv2d_winograd_f16_f16_f16_large.mlir +# conv2d_winograd_f16_f16_f16_medium_calls.mlir +# conv2d_winograd_f16_f16_f16_medium.mlir +# conv2d_winograd_f16_f16_f16_small_calls.mlir +# conv2d_winograd_f16_f16_f16_small.mlir +# f32_f32_f32/ +# conv2d_f32_f32_f32_large_calls.mlir +# conv2d_f32_f32_f32_large.mlir +# conv2d_f32_f32_f32_medium_calls.mlir +# conv2d_f32_f32_f32_medium.mlir +# conv2d_f32_f32_f32_small_calls.mlir +# conv2d_f32_f32_f32_small.mlir +# conv2d_winograd_f32_f32_f32_large_calls.mlir +# conv2d_winograd_f32_f32_f32_large.mlir +# conv2d_winograd_f32_f32_f32_medium_calls.mlir +# conv2d_winograd_f32_f32_f32_medium.mlir +# conv2d_winograd_f32_f32_f32_small_calls.mlir +# conv2d_winograd_f32_f32_f32_small.mlir +# ... +# ... +# Usage: +# generate_test_mlir_files.sh + +set -euo pipefail + +this_dir="$(cd $(dirname $0) && pwd)" +generated_dir_root="${this_dir}/generated" + +# Reset generated directory. +rm -rf ${generated_dir_root?} +mkdir -p ${generated_dir_root?} + +shapes=( + "small" + "medium" + "large" +) + +# input_type;kernel_type;acc_type +type_combinations=( + "f16;f16;f16" + "f32;f32;f32" +) + +for type_combination in ${type_combinations[@]}; do + IFS=";" read -r -a types <<< "${type_combination}" + input_type="${types[0]}" + kernel_type="${types[1]}" + acc_type="${types[2]}" + + type_name="${input_type}_${kernel_type}_${acc_type}" + type_combination_dir="${generated_dir_root}/${type_name}" + mkdir -p ${type_combination_dir} + + for shape in ${shapes[@]}; do + echo "Generating conv2d test files for ${type_name}_${shape}" + + name="conv2d_${type_name}_${shape}" + python ${this_dir}/generate_e2e_conv2d_tests.py \ + --output_conv2d_mlir=${type_combination_dir}/${name}.mlir \ + --output_calls_mlir=${type_combination_dir}/${name}_calls.mlir \ + --input_type=${input_type} \ + --kernel_type=${kernel_type} \ + --acc_type=${acc_type} \ + --shapes=${shape} + + name="conv2d_winograd_${type_name}_${shape}" + python ${this_dir}/generate_e2e_conv2d_tests.py \ + --output_conv2d_mlir=${type_combination_dir}/${name}.mlir \ + --output_calls_mlir=${type_combination_dir}/${name}_calls.mlir \ + --input_type=${input_type} \ + --kernel_type=${kernel_type} \ + --acc_type=${acc_type} \ + --shapes=${shape} + done +done + +# input_type;kernel_type;acc_type +type_combinations=( + "f16;f16;f16" + "f32;f32;f32" +) \ No newline at end of file diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_large.mlir b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_large.mlir new file mode 100644 index 0000000..51c8a1e --- /dev/null +++ b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_large.mlir @@ -0,0 +1,10 @@ +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16(%lhs: tensor<2x4x128x128xf16>, %rhs: tensor<8x4x3x3xf16>, %acc: tensor<2x8x126x126xf16>) -> tensor<2x8x126x126xf16> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x4x128x128xf16>, tensor<8x4x3x3xf16>) outs(%acc: tensor<2x8x126x126xf16>) -> tensor<2x8x126x126xf16> + return %result: tensor<2x8x126x126xf16> +} + +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16(%lhs: tensor<2x3x128x128xf16>, %rhs: tensor<12x3x3x3xf16>, %acc: tensor<2x12x126x126xf16>) -> tensor<2x12x126x126xf16> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x3x128x128xf16>, tensor<12x3x3x3xf16>) outs(%acc: tensor<2x12x126x126xf16>) -> tensor<2x12x126x126xf16> + return %result: tensor<2x12x126x126xf16> +} + diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_large_calls.mlir b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_large_calls.mlir new file mode 100644 index 0000000..7dfb92f --- /dev/null +++ b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_large_calls.mlir @@ -0,0 +1,112 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16_2_4_128_128_8_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x4x128x128x8x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 4 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 128 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 8 : i64 + %kernel_dim1 = arith.constant 4 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 126 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 126 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 4 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 8 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16_2_3_128_128_12_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x3x128x128x12x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 3 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 128 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 12 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 12 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 126 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 12 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 126 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 3 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 12 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_medium.mlir b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_medium.mlir new file mode 100644 index 0000000..a2564aa --- /dev/null +++ b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_medium.mlir @@ -0,0 +1,15 @@ +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%lhs: tensor<2x2x32x32xf16>, %rhs: tensor<2x2x3x3xf16>, %acc: tensor<2x2x30x30xf16>) -> tensor<2x2x30x30xf16> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<2x2x3x3xf16>) outs(%acc: tensor<2x2x30x30xf16>) -> tensor<2x2x30x30xf16> + return %result: tensor<2x2x30x30xf16> +} + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16(%lhs: tensor<2x2x32x32xf16>, %rhs: tensor<64x2x3x3xf16>, %acc: tensor<2x64x30x30xf16>) -> tensor<2x64x30x30xf16> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<64x2x3x3xf16>) outs(%acc: tensor<2x64x30x30xf16>) -> tensor<2x64x30x30xf16> + return %result: tensor<2x64x30x30xf16> +} + +func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f16(%lhs: tensor<2x32x32x32xf16>, %rhs: tensor<64x32x3x3xf16>, %acc: tensor<2x64x30x30xf16>) -> tensor<2x64x30x30xf16> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x32xf16>, tensor<64x32x3x3xf16>) outs(%acc: tensor<2x64x30x30xf16>) -> tensor<2x64x30x30xf16> + return %result: tensor<2x64x30x30xf16> +} + diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_medium_calls.mlir b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_medium_calls.mlir new file mode 100644 index 0000000..c6e86d7 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_medium_calls.mlir @@ -0,0 +1,163 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16_2_2_32_32_2_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16_2_2_32_32_64_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 64 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 64 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 64 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f16_2_32_32_32_64_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x32x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 64 : i64 + %kernel_dim1 = arith.constant 32 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 64 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 64 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 32 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_small.mlir b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_small.mlir new file mode 100644 index 0000000..ddbe425 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_small.mlir @@ -0,0 +1,15 @@ +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16(%lhs: tensor<1x1x1x1xf16>, %rhs: tensor<1x1x1x1xf16>, %acc: tensor<1x1x1x1xf16>) -> tensor<1x1x1x1xf16> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x1x1xf16>, tensor<1x1x1x1xf16>) outs(%acc: tensor<1x1x1x1xf16>) -> tensor<1x1x1x1xf16> + return %result: tensor<1x1x1x1xf16> +} + +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16(%lhs: tensor<1x1x16x16xf16>, %rhs: tensor<1x1x2x2xf16>, %acc: tensor<1x1x15x15xf16>) -> tensor<1x1x15x15xf16> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x16x16xf16>, tensor<1x1x2x2xf16>) outs(%acc: tensor<1x1x15x15xf16>) -> tensor<1x1x15x15xf16> + return %result: tensor<1x1x15x15xf16> +} + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%lhs: tensor<2x2x32x32xf16>, %rhs: tensor<2x2x3x3xf16>, %acc: tensor<2x2x30x30xf16>) -> tensor<2x2x30x30xf16> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<2x2x3x3xf16>) outs(%acc: tensor<2x2x30x30xf16>) -> tensor<2x2x30x30xf16> + return %result: tensor<2x2x30x30xf16> +} + diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_small_calls.mlir b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_small_calls.mlir new file mode 100644 index 0000000..872c618 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_small_calls.mlir @@ -0,0 +1,163 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16_1_1_1_1_1_1_1_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x1x1x1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 1 : i64 + %input_dim3 = arith.constant 1 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 1 : i64 + %kernel_dim3 = arith.constant 1 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 1 : i64 + %acc_dim3 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 1 : i64 + %acc_copy_dim3 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 1 : i64 + %w = arith.constant 1 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 1 : i64 + %kw = arith.constant 1 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16_1_1_16_16_1_2_2_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x16x16x1x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 16 : i64 + %input_dim3 = arith.constant 16 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 2 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 15 : i64 + %acc_dim3 = arith.constant 15 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 15 : i64 + %acc_copy_dim3 = arith.constant 15 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 16 : i64 + %w = arith.constant 16 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 2 : i64 + %kw = arith.constant 2 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16_2_2_32_32_2_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_large.mlir b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_large.mlir new file mode 100644 index 0000000..51c8a1e --- /dev/null +++ b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_large.mlir @@ -0,0 +1,10 @@ +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16(%lhs: tensor<2x4x128x128xf16>, %rhs: tensor<8x4x3x3xf16>, %acc: tensor<2x8x126x126xf16>) -> tensor<2x8x126x126xf16> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x4x128x128xf16>, tensor<8x4x3x3xf16>) outs(%acc: tensor<2x8x126x126xf16>) -> tensor<2x8x126x126xf16> + return %result: tensor<2x8x126x126xf16> +} + +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16(%lhs: tensor<2x3x128x128xf16>, %rhs: tensor<12x3x3x3xf16>, %acc: tensor<2x12x126x126xf16>) -> tensor<2x12x126x126xf16> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x3x128x128xf16>, tensor<12x3x3x3xf16>) outs(%acc: tensor<2x12x126x126xf16>) -> tensor<2x12x126x126xf16> + return %result: tensor<2x12x126x126xf16> +} + diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_large_calls.mlir b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_large_calls.mlir new file mode 100644 index 0000000..7dfb92f --- /dev/null +++ b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_large_calls.mlir @@ -0,0 +1,112 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16_2_4_128_128_8_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x4x128x128x8x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 4 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 128 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 8 : i64 + %kernel_dim1 = arith.constant 4 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 126 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 126 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 4 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 8 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16_2_3_128_128_12_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x3x128x128x12x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 3 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 128 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 12 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 12 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 126 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 12 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 126 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 3 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 12 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_medium.mlir b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_medium.mlir new file mode 100644 index 0000000..a2564aa --- /dev/null +++ b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_medium.mlir @@ -0,0 +1,15 @@ +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%lhs: tensor<2x2x32x32xf16>, %rhs: tensor<2x2x3x3xf16>, %acc: tensor<2x2x30x30xf16>) -> tensor<2x2x30x30xf16> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<2x2x3x3xf16>) outs(%acc: tensor<2x2x30x30xf16>) -> tensor<2x2x30x30xf16> + return %result: tensor<2x2x30x30xf16> +} + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16(%lhs: tensor<2x2x32x32xf16>, %rhs: tensor<64x2x3x3xf16>, %acc: tensor<2x64x30x30xf16>) -> tensor<2x64x30x30xf16> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<64x2x3x3xf16>) outs(%acc: tensor<2x64x30x30xf16>) -> tensor<2x64x30x30xf16> + return %result: tensor<2x64x30x30xf16> +} + +func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f16(%lhs: tensor<2x32x32x32xf16>, %rhs: tensor<64x32x3x3xf16>, %acc: tensor<2x64x30x30xf16>) -> tensor<2x64x30x30xf16> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x32xf16>, tensor<64x32x3x3xf16>) outs(%acc: tensor<2x64x30x30xf16>) -> tensor<2x64x30x30xf16> + return %result: tensor<2x64x30x30xf16> +} + diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_medium_calls.mlir b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_medium_calls.mlir new file mode 100644 index 0000000..c6e86d7 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_medium_calls.mlir @@ -0,0 +1,163 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16_2_2_32_32_2_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16_2_2_32_32_64_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 64 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 64 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 64 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f16_2_32_32_32_64_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x32x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 64 : i64 + %kernel_dim1 = arith.constant 32 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 64 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 64 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 32 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_small.mlir b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_small.mlir new file mode 100644 index 0000000..ddbe425 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_small.mlir @@ -0,0 +1,15 @@ +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16(%lhs: tensor<1x1x1x1xf16>, %rhs: tensor<1x1x1x1xf16>, %acc: tensor<1x1x1x1xf16>) -> tensor<1x1x1x1xf16> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x1x1xf16>, tensor<1x1x1x1xf16>) outs(%acc: tensor<1x1x1x1xf16>) -> tensor<1x1x1x1xf16> + return %result: tensor<1x1x1x1xf16> +} + +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16(%lhs: tensor<1x1x16x16xf16>, %rhs: tensor<1x1x2x2xf16>, %acc: tensor<1x1x15x15xf16>) -> tensor<1x1x15x15xf16> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x16x16xf16>, tensor<1x1x2x2xf16>) outs(%acc: tensor<1x1x15x15xf16>) -> tensor<1x1x15x15xf16> + return %result: tensor<1x1x15x15xf16> +} + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%lhs: tensor<2x2x32x32xf16>, %rhs: tensor<2x2x3x3xf16>, %acc: tensor<2x2x30x30xf16>) -> tensor<2x2x30x30xf16> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<2x2x3x3xf16>) outs(%acc: tensor<2x2x30x30xf16>) -> tensor<2x2x30x30xf16> + return %result: tensor<2x2x30x30xf16> +} + diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_small_calls.mlir b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_small_calls.mlir new file mode 100644 index 0000000..872c618 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_small_calls.mlir @@ -0,0 +1,163 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16_1_1_1_1_1_1_1_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x1x1x1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 1 : i64 + %input_dim3 = arith.constant 1 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 1 : i64 + %kernel_dim3 = arith.constant 1 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 1 : i64 + %acc_dim3 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 1 : i64 + %acc_copy_dim3 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 1 : i64 + %w = arith.constant 1 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 1 : i64 + %kw = arith.constant 1 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16_1_1_16_16_1_2_2_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x16x16x1x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 16 : i64 + %input_dim3 = arith.constant 16 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 2 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 15 : i64 + %acc_dim3 = arith.constant 15 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 15 : i64 + %acc_copy_dim3 = arith.constant 15 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 16 : i64 + %w = arith.constant 16 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 2 : i64 + %kw = arith.constant 2 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16_2_2_32_32_2_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_large.mlir b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_large.mlir new file mode 100644 index 0000000..a47185c --- /dev/null +++ b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_large.mlir @@ -0,0 +1,10 @@ +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32(%lhs: tensor<2x4x128x128xf32>, %rhs: tensor<8x4x3x3xf32>, %acc: tensor<2x8x126x126xf32>) -> tensor<2x8x126x126xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x4x128x128xf32>, tensor<8x4x3x3xf32>) outs(%acc: tensor<2x8x126x126xf32>) -> tensor<2x8x126x126xf32> + return %result: tensor<2x8x126x126xf32> +} + +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32(%lhs: tensor<2x3x128x128xf32>, %rhs: tensor<12x3x3x3xf32>, %acc: tensor<2x12x126x126xf32>) -> tensor<2x12x126x126xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x3x128x128xf32>, tensor<12x3x3x3xf32>) outs(%acc: tensor<2x12x126x126xf32>) -> tensor<2x12x126x126xf32> + return %result: tensor<2x12x126x126xf32> +} + diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_large_calls.mlir b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_large_calls.mlir new file mode 100644 index 0000000..cdd2788 --- /dev/null +++ b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_large_calls.mlir @@ -0,0 +1,112 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32_2_4_128_128_8_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x4x128x128x8x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 4 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 128 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 8 : i64 + %kernel_dim1 = arith.constant 4 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 126 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 126 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 4 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 8 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32_2_3_128_128_12_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x3x128x128x12x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 3 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 128 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 12 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 12 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 126 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 12 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 126 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 3 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 12 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_medium.mlir b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_medium.mlir new file mode 100644 index 0000000..e0a0376 --- /dev/null +++ b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_medium.mlir @@ -0,0 +1,15 @@ +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%lhs: tensor<2x2x32x32xf32>, %rhs: tensor<2x2x3x3xf32>, %acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf32>, tensor<2x2x3x3xf32>) outs(%acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> + return %result: tensor<2x2x30x30xf32> +} + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32(%lhs: tensor<2x2x32x32xf32>, %rhs: tensor<64x2x3x3xf32>, %acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf32>, tensor<64x2x3x3xf32>) outs(%acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> + return %result: tensor<2x64x30x30xf32> +} + +func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f32_f32_f32(%lhs: tensor<2x32x32x32xf32>, %rhs: tensor<64x32x3x3xf32>, %acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x32xf32>, tensor<64x32x3x3xf32>) outs(%acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> + return %result: tensor<2x64x30x30xf32> +} + diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_medium_calls.mlir b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_medium_calls.mlir new file mode 100644 index 0000000..3537bc9 --- /dev/null +++ b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_medium_calls.mlir @@ -0,0 +1,163 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32_2_2_32_32_2_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32_2_2_32_32_64_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 64 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 64 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 64 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f32_f32_f32_2_32_32_32_64_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x32x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 64 : i64 + %kernel_dim1 = arith.constant 32 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 64 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 64 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 32 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_small.mlir b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_small.mlir new file mode 100644 index 0000000..9ecd2bd --- /dev/null +++ b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_small.mlir @@ -0,0 +1,15 @@ +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32(%lhs: tensor<1x1x1x1xf32>, %rhs: tensor<1x1x1x1xf32>, %acc: tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x1x1xf32>, tensor<1x1x1x1xf32>) outs(%acc: tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32> + return %result: tensor<1x1x1x1xf32> +} + +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32(%lhs: tensor<1x1x16x16xf32>, %rhs: tensor<1x1x2x2xf32>, %acc: tensor<1x1x15x15xf32>) -> tensor<1x1x15x15xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x16x16xf32>, tensor<1x1x2x2xf32>) outs(%acc: tensor<1x1x15x15xf32>) -> tensor<1x1x15x15xf32> + return %result: tensor<1x1x15x15xf32> +} + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%lhs: tensor<2x2x32x32xf32>, %rhs: tensor<2x2x3x3xf32>, %acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf32>, tensor<2x2x3x3xf32>) outs(%acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> + return %result: tensor<2x2x30x30xf32> +} + diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_small_calls.mlir b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_small_calls.mlir new file mode 100644 index 0000000..092bd67 --- /dev/null +++ b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_small_calls.mlir @@ -0,0 +1,163 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32_1_1_1_1_1_1_1_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x1x1x1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 1 : i64 + %input_dim3 = arith.constant 1 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 1 : i64 + %kernel_dim3 = arith.constant 1 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 1 : i64 + %acc_dim3 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 1 : i64 + %acc_copy_dim3 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 1 : i64 + %w = arith.constant 1 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 1 : i64 + %kw = arith.constant 1 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32_1_1_16_16_1_2_2_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x16x16x1x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 16 : i64 + %input_dim3 = arith.constant 16 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 2 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 15 : i64 + %acc_dim3 = arith.constant 15 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 15 : i64 + %acc_copy_dim3 = arith.constant 15 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 16 : i64 + %w = arith.constant 16 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 2 : i64 + %kw = arith.constant 2 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32_2_2_32_32_2_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_large.mlir b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_large.mlir new file mode 100644 index 0000000..a47185c --- /dev/null +++ b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_large.mlir @@ -0,0 +1,10 @@ +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32(%lhs: tensor<2x4x128x128xf32>, %rhs: tensor<8x4x3x3xf32>, %acc: tensor<2x8x126x126xf32>) -> tensor<2x8x126x126xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x4x128x128xf32>, tensor<8x4x3x3xf32>) outs(%acc: tensor<2x8x126x126xf32>) -> tensor<2x8x126x126xf32> + return %result: tensor<2x8x126x126xf32> +} + +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32(%lhs: tensor<2x3x128x128xf32>, %rhs: tensor<12x3x3x3xf32>, %acc: tensor<2x12x126x126xf32>) -> tensor<2x12x126x126xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x3x128x128xf32>, tensor<12x3x3x3xf32>) outs(%acc: tensor<2x12x126x126xf32>) -> tensor<2x12x126x126xf32> + return %result: tensor<2x12x126x126xf32> +} + diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_large_calls.mlir b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_large_calls.mlir new file mode 100644 index 0000000..cdd2788 --- /dev/null +++ b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_large_calls.mlir @@ -0,0 +1,112 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32_2_4_128_128_8_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x4x128x128x8x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 4 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 128 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 8 : i64 + %kernel_dim1 = arith.constant 4 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 126 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 126 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 4 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 8 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32_2_3_128_128_12_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x3x128x128x12x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 3 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 128 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 12 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 12 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 126 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 12 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 126 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 3 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 12 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_medium.mlir b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_medium.mlir new file mode 100644 index 0000000..e0a0376 --- /dev/null +++ b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_medium.mlir @@ -0,0 +1,15 @@ +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%lhs: tensor<2x2x32x32xf32>, %rhs: tensor<2x2x3x3xf32>, %acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf32>, tensor<2x2x3x3xf32>) outs(%acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> + return %result: tensor<2x2x30x30xf32> +} + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32(%lhs: tensor<2x2x32x32xf32>, %rhs: tensor<64x2x3x3xf32>, %acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf32>, tensor<64x2x3x3xf32>) outs(%acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> + return %result: tensor<2x64x30x30xf32> +} + +func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f32_f32_f32(%lhs: tensor<2x32x32x32xf32>, %rhs: tensor<64x32x3x3xf32>, %acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x32xf32>, tensor<64x32x3x3xf32>) outs(%acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> + return %result: tensor<2x64x30x30xf32> +} + diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_medium_calls.mlir b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_medium_calls.mlir new file mode 100644 index 0000000..3537bc9 --- /dev/null +++ b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_medium_calls.mlir @@ -0,0 +1,163 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32_2_2_32_32_2_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32_2_2_32_32_64_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 64 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 64 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 64 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f32_f32_f32_2_32_32_32_64_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x32x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 64 : i64 + %kernel_dim1 = arith.constant 32 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 64 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 64 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 32 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_small.mlir b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_small.mlir new file mode 100644 index 0000000..9ecd2bd --- /dev/null +++ b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_small.mlir @@ -0,0 +1,15 @@ +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32(%lhs: tensor<1x1x1x1xf32>, %rhs: tensor<1x1x1x1xf32>, %acc: tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x1x1xf32>, tensor<1x1x1x1xf32>) outs(%acc: tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32> + return %result: tensor<1x1x1x1xf32> +} + +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32(%lhs: tensor<1x1x16x16xf32>, %rhs: tensor<1x1x2x2xf32>, %acc: tensor<1x1x15x15xf32>) -> tensor<1x1x15x15xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x16x16xf32>, tensor<1x1x2x2xf32>) outs(%acc: tensor<1x1x15x15xf32>) -> tensor<1x1x15x15xf32> + return %result: tensor<1x1x15x15xf32> +} + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%lhs: tensor<2x2x32x32xf32>, %rhs: tensor<2x2x3x3xf32>, %acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf32>, tensor<2x2x3x3xf32>) outs(%acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> + return %result: tensor<2x2x30x30xf32> +} + diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_small_calls.mlir b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_small_calls.mlir new file mode 100644 index 0000000..092bd67 --- /dev/null +++ b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_small_calls.mlir @@ -0,0 +1,163 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) + +func.func private @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32_1_1_1_1_1_1_1_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x1x1x1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 1 : i64 + %input_dim3 = arith.constant 1 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 1 : i64 + %kernel_dim3 = arith.constant 1 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 1 : i64 + %acc_dim3 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 1 : i64 + %acc_copy_dim3 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 1 : i64 + %w = arith.constant 1 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 1 : i64 + %kw = arith.constant 1 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32_1_1_16_16_1_2_2_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x16x16x1x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 16 : i64 + %input_dim3 = arith.constant 16 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 2 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 15 : i64 + %acc_dim3 = arith.constant 15 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 15 : i64 + %acc_copy_dim3 = arith.constant 15 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 16 : i64 + %w = arith.constant 16 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 2 : i64 + %kw = arith.constant 2 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32_2_2_32_32_2_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} + + +} diff --git a/linalg_ops/iree-e2e-conv2d-test.cc b/linalg_ops/iree-e2e-conv2d-test.cc new file mode 100644 index 0000000..686fb4e --- /dev/null +++ b/linalg_ops/iree-e2e-conv2d-test.cc @@ -0,0 +1,775 @@ +// Copyright 2024 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include + +#include "iree/base/api.h" +#include "iree/base/internal/flags.h" +#include "iree/base/internal/math.h" +#include "iree/hal/api.h" +#include "iree/modules/hal/module.h" +#include "iree/tooling/context_util.h" +#include "iree/tooling/device_util.h" +#include "iree/vm/api.h" +#include "iree/vm/native_module_cc.h" +#include "tools/testing/e2e/test_utils.h" + +//===----------------------------------------------------------------------===// +// Reference conv2d (NCHW-FCHW) and (NHWC-HWCF) +//===----------------------------------------------------------------------===// + +// Conversion from 4D indices in row major order to 1D index. +static int convert_to_1d_index(iree_hal_dim_t channels, iree_hal_dim_t height, + iree_hal_dim_t width, iree_hal_dim_t n, + iree_hal_dim_t c, iree_hal_dim_t h, + iree_hal_dim_t w) { + return n * (channels * height * width) + c * (height * width) + h * width + w; +} + +// [f16 <= f16 * f16 + f16] +static void reference_conv2d_f16_f16_f16_f16( + iree_hal_dim_t n_size, iree_hal_dim_t c_size, iree_hal_dim_t h_size, + iree_hal_dim_t w_size, iree_hal_dim_t f_size, iree_hal_dim_t kh_size, + iree_hal_dim_t kw_size, iree_hal_dim_t layout, iree_hal_dim_t sh_size, + iree_hal_dim_t sw_size, iree_hal_dim_t dh_size, iree_hal_dim_t dw_size, + iree_hal_dim_t oh_size, iree_hal_dim_t ow_size, const uint16_t* input_data, + const uint16_t* kernel_data, const uint16_t* acc_data, + uint16_t* result_data, iree_hal_dim_t n, iree_hal_dim_t oc, + iree_hal_dim_t oh, iree_hal_dim_t ow) { + if (layout == 0) { + //printf("layout == 0\n"); + // The layout of output tensor is NxfxOHxOW + iree_hal_dim_t out_idx = + convert_to_1d_index(f_size, oh_size, ow_size, n, oc, oh, ow); + + float acc = acc_data ? iree_math_f16_to_f32(acc_data[out_idx]) : 0.f; + + for (iree_hal_dim_t ic = 0; ic < c_size; ++ic) { + for (iree_hal_dim_t kh = 0; kh < kh_size; ++kh) { + for (iree_hal_dim_t kw = 0; kw < kw_size; ++kw) { + iree_hal_dim_t inp_idx = convert_to_1d_index( + c_size, h_size, w_size, n, ic, (oh * sh_size + kh * dh_size), + (ow * sw_size + kw * dw_size)); + iree_hal_dim_t krnl_idx = + convert_to_1d_index(c_size, kh_size, kw_size, oc, ic, kh, kw); + + acc += iree_math_f16_to_f32(input_data[inp_idx]) * + iree_math_f16_to_f32(kernel_data[krnl_idx]); + } + } + result_data[out_idx] = iree_math_f32_to_f16(acc); + } + } else if (layout == 1) { + //printf("layout == 1\n"); + // The layout of output tensor is NxOHxOWxf + iree_hal_dim_t out_idx = + convert_to_1d_index(oh_size, ow_size, f_size, n, oh, ow, oc); + + float acc = acc_data ? iree_math_f16_to_f32(acc_data[out_idx]) : 0.f; + + for (iree_hal_dim_t kh = 0; kh < kh_size; ++kh) { + for (iree_hal_dim_t kw = 0; kw < kw_size; ++kw) { + for (iree_hal_dim_t ic = 0; ic < c_size; ++ic) { + iree_hal_dim_t inp_idx = convert_to_1d_index( + h_size, w_size, c_size, n, (oh * sh_size + kh * dh_size), + (ow * sw_size + kw * dw_size), ic); + iree_hal_dim_t krnl_idx = + convert_to_1d_index(kw_size, c_size, f_size, kh, kw, ic, oc); + + acc += iree_math_f16_to_f32(input_data[inp_idx]) * + iree_math_f16_to_f32(kernel_data[krnl_idx]); + } + } + } + result_data[out_idx] = iree_math_f32_to_f16(acc); + } +} + +// [f32 <= f16 * f16 + f32] +static void reference_conv2d_f16_f16_f32_f32( + iree_hal_dim_t n_size, iree_hal_dim_t c_size, iree_hal_dim_t h_size, + iree_hal_dim_t w_size, iree_hal_dim_t f_size, iree_hal_dim_t kh_size, + iree_hal_dim_t kw_size, iree_hal_dim_t layout, iree_hal_dim_t sh_size, + iree_hal_dim_t sw_size, iree_hal_dim_t dh_size, iree_hal_dim_t dw_size, + iree_hal_dim_t oh_size, iree_hal_dim_t ow_size, const uint16_t* input_data, + const uint16_t* kernel_data, const float* acc_data, float* result_data, + iree_hal_dim_t n, iree_hal_dim_t oc, iree_hal_dim_t oh, iree_hal_dim_t ow) { + if (layout == 0) { + // The layout of output tensor is NxfxOHxOW + iree_hal_dim_t out_idx = + convert_to_1d_index(f_size, oh_size, ow_size, n, oc, oh, ow); + + float acc = acc_data ? acc_data[out_idx] : 0.f; + + for (iree_hal_dim_t ic = 0; ic < c_size; ++ic) { + for (iree_hal_dim_t kh = 0; kh < kh_size; ++kh) { + for (iree_hal_dim_t kw = 0; kw < kw_size; ++kw) { + iree_hal_dim_t inp_idx = convert_to_1d_index( + c_size, h_size, w_size, n, ic, (oh * sh_size + kh * dh_size), + (ow * sw_size + kw * dw_size)); + iree_hal_dim_t krnl_idx = + convert_to_1d_index(c_size, kh_size, kw_size, oc, ic, kh, kw); + + acc += iree_math_f16_to_f32(input_data[inp_idx]) * + iree_math_f16_to_f32(kernel_data[krnl_idx]); + } + } + result_data[out_idx] = acc; + } + } else if (layout == 1) { + // The layout of output tensor is NxOHxOWxf + iree_hal_dim_t out_idx = + convert_to_1d_index(oh_size, ow_size, f_size, n, oh, ow, oc); + + float acc = acc_data ? acc_data[out_idx] : 0.f; + + for (iree_hal_dim_t kh = 0; kh < kh_size; ++kh) { + for (iree_hal_dim_t kw = 0; kw < kw_size; ++kw) { + for (iree_hal_dim_t ic = 0; ic < c_size; ++ic) { + iree_hal_dim_t inp_idx = convert_to_1d_index( + h_size, w_size, c_size, n, (oh * sh_size + kh * dh_size), + (ow * sw_size + kw * dw_size), ic); + iree_hal_dim_t krnl_idx = + convert_to_1d_index(kw_size, c_size, f_size, kh, kw, ic, oc); + + acc += iree_math_f16_to_f32(input_data[inp_idx]) * + iree_math_f16_to_f32(kernel_data[krnl_idx]); + } + } + } + result_data[out_idx] = acc; + } +} + +// [i32 <= i8 * i8 + i32] +static void reference_conv2d_i8_i8_i32_i32( + iree_hal_dim_t n_size, iree_hal_dim_t c_size, iree_hal_dim_t h_size, + iree_hal_dim_t w_size, iree_hal_dim_t f_size, iree_hal_dim_t kh_size, + iree_hal_dim_t kw_size, iree_hal_dim_t layout, iree_hal_dim_t sh_size, + iree_hal_dim_t sw_size, iree_hal_dim_t dh_size, iree_hal_dim_t dw_size, + iree_hal_dim_t oh_size, iree_hal_dim_t ow_size, const int8_t* input_data, + const int8_t* kernel_data, const int32_t* acc_data, int32_t* result_data, + iree_hal_dim_t n, iree_hal_dim_t oc, iree_hal_dim_t oh, iree_hal_dim_t ow) { + if (layout == 0) { + // The layout of output tensor is NxfxOHxOW + iree_hal_dim_t out_idx = + convert_to_1d_index(f_size, oh_size, ow_size, n, oc, oh, ow); + + int32_t acc = acc_data ? acc_data[out_idx] : 0; + + for (iree_hal_dim_t ic = 0; ic < c_size; ++ic) { + for (iree_hal_dim_t kh = 0; kh < kh_size; ++kh) { + for (iree_hal_dim_t kw = 0; kw < kw_size; ++kw) { + iree_hal_dim_t inp_idx = convert_to_1d_index( + c_size, h_size, w_size, n, ic, (oh * sh_size + kh * dh_size), + (ow * sw_size + kw * dw_size)); + iree_hal_dim_t krnl_idx = + convert_to_1d_index(c_size, kh_size, kw_size, oc, ic, kh, kw); + + int8_t lhs_value = input_data[inp_idx]; + int8_t rhs_value = kernel_data[krnl_idx]; + acc += (int32_t)lhs_value * (int32_t)rhs_value; + } + } + result_data[out_idx] = acc; + } + } else if (layout == 1) { + // The layout of output tensor is NxOHxOWxf + iree_hal_dim_t out_idx = + convert_to_1d_index(oh_size, ow_size, f_size, n, oh, ow, oc); + + int32_t acc = acc_data ? acc_data[out_idx] : 0; + + for (iree_hal_dim_t kh = 0; kh < kh_size; ++kh) { + for (iree_hal_dim_t kw = 0; kw < kw_size; ++kw) { + for (iree_hal_dim_t ic = 0; ic < c_size; ++ic) { + iree_hal_dim_t inp_idx = convert_to_1d_index( + h_size, w_size, c_size, n, (oh * sh_size + kh * dh_size), + (ow * sw_size + kw * dw_size), ic); + iree_hal_dim_t krnl_idx = + convert_to_1d_index(kw_size, c_size, f_size, kh, kw, ic, oc); + + int8_t lhs_value = input_data[inp_idx]; + int8_t rhs_value = kernel_data[krnl_idx]; + acc += (int32_t)lhs_value * (int32_t)rhs_value; + } + } + } + result_data[out_idx] = acc; + } +} + +static void reference_conv2d_f32_f32_f32_f32( + iree_hal_dim_t n_size, iree_hal_dim_t c_size, iree_hal_dim_t h_size, + iree_hal_dim_t w_size, iree_hal_dim_t f_size, iree_hal_dim_t kh_size, + iree_hal_dim_t kw_size, iree_hal_dim_t layout, iree_hal_dim_t sh_size, + iree_hal_dim_t sw_size, iree_hal_dim_t dh_size, iree_hal_dim_t dw_size, + iree_hal_dim_t oh_size, iree_hal_dim_t ow_size, const float* input_data, + const float* kernel_data, const float* acc_data, float* result_data, + iree_hal_dim_t n, iree_hal_dim_t oc, iree_hal_dim_t oh, iree_hal_dim_t ow) { + if (layout == 0) { + // The layout of output tensor is NxfxOHxOW + iree_hal_dim_t out_idx = + convert_to_1d_index(f_size, oh_size, ow_size, n, oc, oh, ow); + + float acc = acc_data ? acc_data[out_idx] : 0; + + for (iree_hal_dim_t ic = 0; ic < c_size; ++ic) { + for (iree_hal_dim_t kh = 0; kh < kh_size; ++kh) { + for (iree_hal_dim_t kw = 0; kw < kw_size; ++kw) { + iree_hal_dim_t inp_idx = convert_to_1d_index( + c_size, h_size, w_size, n, ic, (oh * sh_size + kh * dh_size), + (ow * sw_size + kw * dw_size)); + iree_hal_dim_t krnl_idx = + convert_to_1d_index(c_size, kh_size, kw_size, oc, ic, kh, kw); + + acc += input_data[inp_idx] * kernel_data[krnl_idx]; + } + } + result_data[out_idx] = acc; + } + } else if (layout == 1) { + // The layout of output tensor is NxOHxOWxf + iree_hal_dim_t out_idx = + convert_to_1d_index(oh_size, ow_size, f_size, n, oh, ow, oc); + + float acc = acc_data ? acc_data[out_idx] : 0; + + for (iree_hal_dim_t kh = 0; kh < kh_size; ++kh) { + for (iree_hal_dim_t kw = 0; kw < kw_size; ++kw) { + for (iree_hal_dim_t ic = 0; ic < c_size; ++ic) { + iree_hal_dim_t inp_idx = convert_to_1d_index( + h_size, w_size, c_size, n, (oh * sh_size + kh * dh_size), + (ow * sw_size + kw * dw_size), ic); + iree_hal_dim_t krnl_idx = + convert_to_1d_index(kw_size, c_size, f_size, kh, kw, ic, oc); + acc += input_data[inp_idx] * kernel_data[krnl_idx]; + } + } + } + result_data[out_idx] = acc; + } +} + +// Helper for reference_conv2d. +static iree_status_t reference_conv2d_element( + iree_hal_dim_t n_size, iree_hal_dim_t c_size, iree_hal_dim_t h_size, + iree_hal_dim_t w_size, iree_hal_dim_t f_size, iree_hal_dim_t kh_size, + iree_hal_dim_t kw_size, iree_hal_dim_t layout, iree_hal_dim_t sh_size, + iree_hal_dim_t sw_size, iree_hal_dim_t dh_size, iree_hal_dim_t dw_size, + iree_hal_dim_t oh_size, iree_hal_dim_t ow_size, + iree_hal_element_type_t input_type, iree_hal_element_type_t kernel_type, + iree_hal_element_type_t acc_type, void* input_data, void* kernel_data, + void* acc_data, void* result_data, iree_hal_dim_t n, iree_hal_dim_t oc, + iree_hal_dim_t oh, iree_hal_dim_t ow) { + if (input_type == IREE_HAL_ELEMENT_TYPE_FLOAT_32 && + kernel_type == IREE_HAL_ELEMENT_TYPE_FLOAT_32 && + acc_type == IREE_HAL_ELEMENT_TYPE_FLOAT_32) { + reference_conv2d_f32_f32_f32_f32( + n_size, c_size, h_size, w_size, f_size, kh_size, kw_size, layout, + sh_size, sw_size, dh_size, dw_size, oh_size, ow_size, + (const float*)input_data, (const float*)kernel_data, + (const float*)acc_data, (float*)result_data, n, oc, oh, ow); + } else if (input_type == IREE_HAL_ELEMENT_TYPE_FLOAT_16 && + kernel_type == IREE_HAL_ELEMENT_TYPE_FLOAT_16 && + acc_type == IREE_HAL_ELEMENT_TYPE_FLOAT_16) { + reference_conv2d_f16_f16_f16_f16( + n_size, c_size, h_size, w_size, f_size, kh_size, kw_size, layout, + sh_size, sw_size, dh_size, dw_size, oh_size, ow_size, + (const uint16_t*)input_data, (const uint16_t*)kernel_data, + (const uint16_t*)acc_data, (uint16_t*)result_data, n, oc, oh, ow); + } else if (input_type == IREE_HAL_ELEMENT_TYPE_FLOAT_16 && + kernel_type == IREE_HAL_ELEMENT_TYPE_FLOAT_16 && + acc_type == IREE_HAL_ELEMENT_TYPE_FLOAT_32) { + reference_conv2d_f16_f16_f32_f32( + n_size, c_size, h_size, w_size, f_size, kh_size, kw_size, layout, + sh_size, sw_size, dh_size, dw_size, oh_size, ow_size, + (const uint16_t*)input_data, (const uint16_t*)kernel_data, + (const float*)acc_data, (float*)result_data, n, oc, oh, ow); + } else if (input_type == IREE_HAL_ELEMENT_TYPE_INT_8 && + kernel_type == IREE_HAL_ELEMENT_TYPE_INT_8 && + acc_type == IREE_HAL_ELEMENT_TYPE_INT_32) { + reference_conv2d_i8_i8_i32_i32( + n_size, c_size, h_size, w_size, f_size, kh_size, kw_size, layout, + sh_size, sw_size, dh_size, dw_size, oh_size, ow_size, + (const int8_t*)input_data, (const int8_t*)kernel_data, + (const int32_t*)acc_data, (int32_t*)result_data, n, oc, oh, ow); + } else { + return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, + "unhandled combination of element types in conv2d"); + } + return iree_ok_status(); +} + +// Calculate the output shape given the dilation and strides. +static iree_hal_dim_t out_shape_calc(iree_hal_dim_t i_shape, + iree_hal_dim_t k_shape, + iree_hal_dim_t stride, + iree_hal_dim_t dilation) { + iree_hal_dim_t x = (k_shape - 1) * (dilation - 1); + x = i_shape - k_shape - x; + return floor(x / stride) + 1; +} + +// Reference conv2d-NCHW-FCHW implementation, used to compare conv2d results +// against. +static iree_status_t reference_conv2d( + iree_hal_dim_t n_size, iree_hal_dim_t c_size, iree_hal_dim_t h_size, + iree_hal_dim_t w_size, iree_hal_dim_t f_size, iree_hal_dim_t kh_size, + iree_hal_dim_t kw_size, iree_hal_dim_t layout, iree_hal_dim_t sh_size, + iree_hal_dim_t sw_size, iree_hal_dim_t dh_size, iree_hal_dim_t dw_size, + iree_hal_element_type_t input_type, iree_hal_element_type_t kernel_type, + iree_hal_element_type_t acc_type, iree_byte_span_t input_contents, + iree_byte_span_t kernel_contents, iree_byte_span_t acc_contents, + iree_byte_span_t result_contents, int compute_every) { + IREE_TRACE_ZONE_BEGIN(z0); + IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, n_size); + IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, c_size); + IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, h_size); + IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, w_size); + + iree_hal_dim_t oh_size = out_shape_calc(h_size, kh_size, sh_size, dh_size); + iree_hal_dim_t ow_size = out_shape_calc(w_size, kw_size, sw_size, dw_size); + + if (layout == 0) { + for (iree_hal_dim_t n = 0; n < n_size; ++n) { + for (iree_hal_dim_t oc = 0; oc < f_size; ++oc) { + for (iree_hal_dim_t oh = 0; oh < oh_size; ++oh) { + for (iree_hal_dim_t ow = 0; ow < ow_size; ++ow) { + IREE_RETURN_AND_END_ZONE_IF_ERROR( + z0, + reference_conv2d_element( + n_size, c_size, h_size, w_size, f_size, kh_size, kw_size, + layout, sh_size, sw_size, dh_size, dw_size, oh_size, + ow_size, input_type, kernel_type, acc_type, + input_contents.data, kernel_contents.data, + acc_contents.data, result_contents.data, n, oc, oh, ow)); + } + } + } + } + } else if (layout == 1) { + for (iree_hal_dim_t n = 0; n < n_size; ++n) { + for (iree_hal_dim_t oh = 0; oh < oh_size; ++oh) { + for (iree_hal_dim_t ow = 0; ow < ow_size; ++ow) { + for (iree_hal_dim_t oc = 0; oc < f_size; ++oc) { + IREE_RETURN_AND_END_ZONE_IF_ERROR( + z0, + reference_conv2d_element( + n_size, c_size, h_size, w_size, f_size, kh_size, kw_size, + layout, sh_size, sw_size, dh_size, dw_size, oh_size, + ow_size, input_type, kernel_type, acc_type, + input_contents.data, kernel_contents.data, + acc_contents.data, result_contents.data, n, oc, oh, ow)); + } + } + } + } + } else { + return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, + "unhandled conv2d layout"); + } + + IREE_TRACE_ZONE_END(z0); + return iree_ok_status(); +} + +//===----------------------------------------------------------------------===// +// Conv2d comparison/logging +//===----------------------------------------------------------------------===// + +typedef struct { + iree_allocator_t host_allocator; + iree_hal_dim_t n; // batch dim + iree_hal_dim_t c; // input channels + iree_hal_dim_t h; // input height + iree_hal_dim_t w; // input width + iree_hal_dim_t f; // output channels + iree_hal_dim_t kh; // kernel height + iree_hal_dim_t kw; // kernel width + iree_hal_dim_t layout; // conv layout, 0 : nchwxfchw (default); 1: nhwcxhwcf + iree_hal_dim_t sh; // stride along height dim + iree_hal_dim_t sw; // stride along width dim + iree_hal_dim_t dh; // dilation along height dim + iree_hal_dim_t dw; // dilation along width dim + iree_hal_element_type_t input_type; + iree_hal_element_type_t kernel_type; + iree_hal_element_type_t acc_type; + iree_hal_element_type_t result_type; + iree_byte_span_t input_contents; + iree_byte_span_t kernel_contents; + iree_byte_span_t acc_contents; + iree_byte_span_t actual_contents; + iree_byte_span_t expected_contents; +} conv2d_results_t; + +static void conv2d_results_deinitialize(conv2d_results_t* results); + +static iree_status_t conv2d_results_initialize( + iree_hal_device_t* device, iree_hal_dim_t n_size, iree_hal_dim_t c_size, + iree_hal_dim_t h_size, iree_hal_dim_t w_size, iree_hal_dim_t f_size, + iree_hal_dim_t kh_size, iree_hal_dim_t kw_size, iree_hal_dim_t layout, + iree_hal_dim_t sh_size, iree_hal_dim_t sw_size, iree_hal_dim_t dh_size, + iree_hal_dim_t dw_size, iree_hal_buffer_view_t* input, + iree_hal_buffer_view_t* kernel, iree_hal_buffer_view_t* acc, + iree_hal_buffer_view_t* result, iree_allocator_t host_allocator, + conv2d_results_t* out_results) { + IREE_TRACE_ZONE_BEGIN(z0); + + memset(out_results, 0, sizeof(*out_results)); + out_results->host_allocator = host_allocator; + + out_results->n = n_size; + out_results->c = c_size; + out_results->h = h_size; + out_results->w = w_size; + out_results->f = f_size; + out_results->kh = kh_size; + out_results->kw = kw_size; + out_results->layout = layout; + out_results->sh = sh_size; + out_results->sw = sw_size; + out_results->dh = dh_size; + out_results->dw = dw_size; + + out_results->input_type = iree_hal_buffer_view_element_type(input); + out_results->kernel_type = iree_hal_buffer_view_element_type(kernel); + out_results->acc_type = iree_hal_buffer_view_element_type(acc); + out_results->result_type = iree_hal_buffer_view_element_type(result); + + iree_hal_buffer_t* input_buffer = iree_hal_buffer_view_buffer(input); + iree_hal_buffer_t* kernel_buffer = iree_hal_buffer_view_buffer(kernel); + iree_hal_buffer_t* acc_buffer = acc ? iree_hal_buffer_view_buffer(acc) : NULL; + iree_hal_buffer_t* result_buffer = iree_hal_buffer_view_buffer(result); + + iree_status_t status = iree_ok_status(); + + if (iree_status_is_ok(status)) { + out_results->input_contents.data_length = + iree_hal_buffer_byte_length(input_buffer); + status = iree_allocator_malloc(host_allocator, + out_results->input_contents.data_length, + (void**)&out_results->input_contents.data); + } + if (iree_status_is_ok(status)) { + status = iree_hal_device_transfer_d2h( + device, input_buffer, 0, out_results->input_contents.data, + out_results->input_contents.data_length, + IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT, iree_infinite_timeout()); + } + + if (iree_status_is_ok(status)) { + out_results->kernel_contents.data_length = + iree_hal_buffer_byte_length(kernel_buffer); + status = iree_allocator_malloc(host_allocator, + out_results->kernel_contents.data_length, + (void**)&out_results->kernel_contents.data); + } + if (iree_status_is_ok(status)) { + status = iree_hal_device_transfer_d2h( + device, kernel_buffer, 0, out_results->kernel_contents.data, + out_results->kernel_contents.data_length, + IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT, iree_infinite_timeout()); + } + + if (acc_buffer) { + if (iree_status_is_ok(status)) { + out_results->acc_contents.data_length = + iree_hal_buffer_byte_length(acc_buffer); + status = iree_allocator_malloc(host_allocator, + out_results->acc_contents.data_length, + (void**)&out_results->acc_contents.data); + } + if (iree_status_is_ok(status)) { + status = iree_hal_device_transfer_d2h( + device, acc_buffer, 0, out_results->acc_contents.data, + out_results->acc_contents.data_length, + IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT, iree_infinite_timeout()); + } + } + + if (iree_status_is_ok(status)) { + out_results->actual_contents.data_length = + iree_hal_buffer_byte_length(result_buffer); + status = iree_allocator_malloc(host_allocator, + out_results->actual_contents.data_length, + (void**)&out_results->actual_contents.data); + } + if (iree_status_is_ok(status)) { + status = iree_hal_device_transfer_d2h( + device, result_buffer, 0, out_results->actual_contents.data, + out_results->actual_contents.data_length, + IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT, iree_infinite_timeout()); + } + + if (iree_status_is_ok(status)) { + out_results->expected_contents.data_length = + iree_hal_buffer_byte_length(result_buffer); + status = iree_allocator_malloc( + host_allocator, out_results->expected_contents.data_length, + (void**)&out_results->expected_contents.data); + } + + if (!iree_status_is_ok(status)) { + conv2d_results_deinitialize(out_results); + } + IREE_TRACE_ZONE_END(z0); + return status; +} + +static void conv2d_results_deinitialize(conv2d_results_t* results) { + IREE_TRACE_ZONE_BEGIN(z0); + + iree_allocator_free(results->host_allocator, results->input_contents.data); + iree_allocator_free(results->host_allocator, results->kernel_contents.data); + if (!iree_byte_span_is_empty(results->acc_contents)) { + iree_allocator_free(results->host_allocator, results->acc_contents.data); + } + iree_allocator_free(results->host_allocator, results->actual_contents.data); + iree_allocator_free(results->host_allocator, results->expected_contents.data); + + IREE_TRACE_ZONE_END(z0); +} + +// Helper for check_conv2d: the actual interesting part once we've +// obtained and validated the {n, f, oh, ow}_size values. On error, the first +// index is returned where the actual and expected value doesn't match. TODO: +// Add detailed logging to |file|. +static iree_status_t check_conv2d_results_impl(FILE* file, + const conv2d_results_t* results, + int check_every) { + IREE_TRACE_ZONE_BEGIN(z0); + + IREE_RETURN_AND_END_ZONE_IF_ERROR( + z0, + reference_conv2d( + results->n, results->c, results->h, results->w, results->f, + results->kh, results->kw, results->layout, results->sh, results->sw, + results->dh, results->dw, results->input_type, results->kernel_type, + results->acc_type, results->input_contents, results->kernel_contents, + results->acc_contents, results->expected_contents, check_every)); + + int count = 0; + + iree_hal_dim_t oh_size = + out_shape_calc(results->h, results->kh, results->sh, results->dh); + iree_hal_dim_t ow_size = + out_shape_calc(results->w, results->kw, results->sw, results->dw); + + for (iree_hal_dim_t n = 0; n < results->n; ++n) { + for (iree_hal_dim_t oc = 0; oc < results->f; ++oc) { + for (iree_hal_dim_t oh = 0; oh < oh_size; ++oh) { + for (iree_hal_dim_t ow = 0; ow < ow_size; ++ow) { + if (++count < check_every) continue; + count = 0; + iree_hal_dim_t idx = + convert_to_1d_index(results->f, oh_size, ow_size, n, oc, oh, ow); + iree_test_utils_e2e_value_t actual_value = + iree_test_utils_read_buffer_element( + idx, results->result_type, results->actual_contents.data); + iree_test_utils_e2e_value_t expected_value = + iree_test_utils_read_buffer_element( + idx, results->result_type, results->expected_contents.data); + if (!iree_test_utils_result_elements_agree(actual_value, + expected_value)) { + printf("HERE: actual_value: %f, actual_value: %f\n", actual_value.f32, expected_value.f32); + fprintf( + file, + "\n\nerror: the actual and expected result tensors disagree " + "at n %" PRIdim ", oc %" PRIdim ", oh %" PRIdim ", ow %" PRIdim + ".\n\n", + n, oc, oh, ow); + IREE_TRACE_ZONE_END(z0); + return iree_make_status(IREE_STATUS_ABORTED); + } + } + } + } + } + + IREE_TRACE_ZONE_END(z0); + return iree_ok_status(); +} + +// Given an actual conv2d's inputs and output (all host-local), uses a +// reference conv2d implementation on the same inputs to check if the output +// is correct. On error, the first index is returned where the actual and +// expected value doesn't match. TODO: Add detailed logging to |file|. +static iree_status_t check_conv2d_results(FILE* file, + const conv2d_results_t* results) { + IREE_TRACE_ZONE_BEGIN(z0); + // TODO: Increase the check every param to reduce the number of comparisons. + int check_every = 1; + iree_status_t status = check_conv2d_results_impl(file, results, check_every); + if (!iree_status_is_ok(status) && check_every > 1) { + // If we got a failure with check_every>1, that didn't log a useful + // numerical summary, as most of the reference tensor entries hadn't been + // computed. Rerun now with check_every=1 to get that numerical logging. + iree_status_ignore(status); + status = check_conv2d_results_impl(file, results, 1); + } + IREE_TRACE_ZONE_END(z0); + return status; +} + +//===----------------------------------------------------------------------===// +// `conv2d_test` custom module +//===----------------------------------------------------------------------===// +// This uses the C++ wrapper to keep things simple. Though easier to use it's +// got additional overhead/code-size bloat that doesn't matter in a test like +// this. Making a C module builder API that removes the boilerplate there is +// TBD so this file is written in C besides this module so that we can swap it +// back to being pure C in the future. + +namespace iree { + +class Conv2dTestModuleState final { + public: + explicit Conv2dTestModuleState(iree_allocator_t host_allocator) + : host_allocator_(host_allocator) {} + ~Conv2dTestModuleState() = default; + + // Fills the destination span with pseudorandom values of the given + // |element_type|. The given |seed| is passed to the pseudorandom generator. + // The pseudorandom values are reproducible both across runs and across + // machines. + StatusOr> GenerateRandom4dTensor( + const vm::ref device, int64_t dim0, int64_t dim1, + int64_t dim2, int64_t dim3, iree_hal_element_type_t element_type, + int32_t seed) { + iree_hal_dim_t dims[4] = { + (iree_hal_dim_t)dim0, + (iree_hal_dim_t)dim1, + (iree_hal_dim_t)dim2, + (iree_hal_dim_t)dim3, + }; + iree_hal_buffer_params_t buffer_params = {0}; + buffer_params.usage = IREE_HAL_BUFFER_USAGE_DEFAULT; + buffer_params.access = IREE_HAL_MEMORY_ACCESS_ALL; + buffer_params.type = IREE_HAL_MEMORY_TYPE_OPTIMAL_FOR_DEVICE; + vm::ref result_view; + struct callback_state_t { + iree_hal_element_type_t element_type; + int32_t seed; + } callback_state = { + element_type, + seed, + }; + IREE_RETURN_IF_ERROR(iree_hal_buffer_view_generate_buffer( + device.get(), iree_hal_device_allocator(device.get()), + IREE_ARRAYSIZE(dims), dims, element_type, + IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR, buffer_params, + +[](iree_hal_buffer_mapping_t* mapping, void* user_data) { + callback_state_t callback_state = *(callback_state_t*)user_data; + iree_byte_span_t span = mapping->contents; + // Generate "uniform" integer-valued numbers in the range [min, + // max]. + int32_t min = 0; + int32_t max = 0; + iree_test_utils_get_min_max_for_element_type( + callback_state.element_type, &min, &max); + // divided by 4 to make numerical behavior more stable + uint32_t range = (max - min + 1) / 4; + iree_host_size_t element_byte_count = + iree_hal_element_dense_byte_count(callback_state.element_type); + uint8_t* data_end = span.data + span.data_length; + uint32_t state = callback_state.seed; + for (uint8_t* data = span.data; data < data_end; + data += element_byte_count) { + int32_t value = + (int32_t)iree_test_utils_pseudorandom_range(&state, range) + + min; + iree_test_utils_write_element(callback_state.element_type, value, + data); + } + return iree_ok_status(); + }, + &callback_state, &result_view)); + return std::move(result_view); + } + + Status CheckConv2dResults( + const vm::ref device, int64_t n, int64_t c, int64_t h, + int64_t w, int64_t f, int64_t kh, int64_t kw, int64_t layout, int64_t sh, + int64_t sw, int64_t dh, int64_t dw, + const vm::ref input, + const vm::ref kernel, + const vm::ref acc, + const vm::ref actual_result) { + conv2d_results_t results = {}; + IREE_RETURN_IF_ERROR(conv2d_results_initialize( + device.get(), (iree_hal_dim_t)n, (iree_hal_dim_t)c, (iree_hal_dim_t)h, + (iree_hal_dim_t)w, (iree_hal_dim_t)f, (iree_hal_dim_t)kh, + (iree_hal_dim_t)kw, (iree_hal_dim_t)layout, (iree_hal_dim_t)sh, + (iree_hal_dim_t)sw, (iree_hal_dim_t)dh, (iree_hal_dim_t)dw, input.get(), + kernel.get(), acc.get(), actual_result.get(), host_allocator_, + &results)); + iree_status_t status = check_conv2d_results(stderr, &results); + conv2d_results_deinitialize(&results); + return status; + } + + private: + iree_allocator_t host_allocator_; +}; + +static const vm::NativeFunction + kConv2dTestModuleFunctions[] = { + vm::MakeNativeFunction("generate_random_tensor", + &Conv2dTestModuleState::GenerateRandom4dTensor), + vm::MakeNativeFunction("check_conv2d_results", + &Conv2dTestModuleState::CheckConv2dResults), +}; + +struct Conv2dTestModule final : public vm::NativeModule { + using vm::NativeModule::NativeModule; + StatusOr> CreateState( + iree_allocator_t host_allocator) override { + return std::make_unique(host_allocator); + } +}; + +} // namespace iree + +static iree_status_t conv2d_test_module_create(iree_vm_instance_t* instance, + iree_allocator_t host_allocator, + iree_vm_module_t** out_module) { + IREE_ASSERT_ARGUMENT(out_module); + *out_module = NULL; + auto module = std::make_unique( + "conv2d_test", /*version=*/0, instance, host_allocator, + iree::span>( + iree::kConv2dTestModuleFunctions)); + *out_module = module.release()->interface(); + return iree_ok_status(); +} + +int main(int argc, char** argv) { + IREE_TRACE_APP_ENTER(); + + iree_flags_parse_checked(IREE_FLAGS_PARSE_MODE_DEFAULT, &argc, &argv); + if (argc != 1) { + fprintf(stderr, "use --module= flags to specify the modules to run\n"); + IREE_TRACE_APP_EXIT(EXIT_FAILURE); + return EXIT_FAILURE; + } + + iree_status_t status = iree_test_utils_load_and_run_e2e_tests( + iree_allocator_system(), conv2d_test_module_create); + int exit_code = EXIT_SUCCESS; + if (!iree_status_is_ok(status)) { + iree_status_fprint(stderr, status); + bool is_unavailable = iree_status_is_unavailable(status); + iree_status_free(status); + exit_code = is_unavailable ? EXIT_SUCCESS : EXIT_FAILURE; + } + + IREE_TRACE_APP_EXIT(exit_code); + return exit_code; +} \ No newline at end of file diff --git a/linalg_ops/test_utils.c b/linalg_ops/test_utils.c index 8b8aecd..659ea2c 100644 --- a/linalg_ops/test_utils.c +++ b/linalg_ops/test_utils.c @@ -193,10 +193,8 @@ bool iree_test_utils_result_elements_agree(iree_test_utils_e2e_value_t expected, // expected values. Inexact results are only permitted when the // `require_exact_results` flag is set to `false`. case IREE_TEST_UTILS_VALUE_TYPE_F16: - if (actual.f16_u16 == expected.f16_u16) return true; - if (iree_test_utils_max_elements_to_check()) return false; - return fabsf(iree_math_f16_to_f32(actual.f16_u16) - - iree_math_f16_to_f32(expected.f16_u16)) < + if (actual.f16 == expected.f16) return true; + return fabsf((actual.f16) - (expected.f16)) < acceptable_fp_delta; case IREE_TEST_UTILS_VALUE_TYPE_BF16: if (actual.bf16_u16 == expected.bf16_u16) return true; diff --git a/linalg_ops/test_utils.h b/linalg_ops/test_utils.h index 626097b..f86986b 100644 --- a/linalg_ops/test_utils.h +++ b/linalg_ops/test_utils.h @@ -62,6 +62,7 @@ typedef struct iree_test_utils_value_t { int16_t i16; int32_t i32; int64_t i64; + float f16; float f32; uint16_t f16_u16; uint16_t bf16_u16; From 89f8ac623ef7c01262d1223cdb07ab41b5e684c8 Mon Sep 17 00:00:00 2001 From: erman-gurses Date: Mon, 21 Oct 2024 00:51:35 -0500 Subject: [PATCH 02/16] Add formatting Signed-off-by: erman-gurses --- linalg_ops/convolution/CMakeLists.txt | 39 +-------------------------- 1 file changed, 1 insertion(+), 38 deletions(-) diff --git a/linalg_ops/convolution/CMakeLists.txt b/linalg_ops/convolution/CMakeLists.txt index e9a57b5..db18238 100644 --- a/linalg_ops/convolution/CMakeLists.txt +++ b/linalg_ops/convolution/CMakeLists.txt @@ -72,41 +72,4 @@ foreach(_DTYPE IN LISTS _DTYPES) "default" ) endforeach() -endforeach() - -############################################################################### -# -# GPU - ROCm/HIP, default flags. -# -############################################################################### - -# if(IREE_HIP_TEST_TARGET_CHIP) - -# set(_DTYPES) -# list(APPEND _DTYPES "f16_f16_f16") -# list(APPEND _DTYPES "f32_f32_f32") - -# foreach(_DTYPE IN LISTS _DTYPES) -# foreach(_SIZE IN LISTS _SIZES) -# iree_test_suites_runner_test( -# NAME -# matmul_hip_${_DTYPE}_${_SIZE} -# TESTS_SRC -# "generated/${_DTYPE}/matmul_${_DTYPE}_${_SIZE}.mlir" -# CALLS_SRC -# "generated/${_DTYPE}/matmul_${_DTYPE}_${_SIZE}_calls.mlir" -# TEST_RUNNER -# iree-test-suites_iree-e2e-matmul-test -# TARGET_BACKEND -# "rocm" -# DRIVER -# "hip" -# COMPILER_FLAGS -# "--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}" -# RUNNER_FLAGS -# LABELS -# ) -# endforeach() -# endforeach() - -# endif() \ No newline at end of file +endforeach() \ No newline at end of file From 5323f0a39a63ad251317ce7088dd792819324802 Mon Sep 17 00:00:00 2001 From: erman-gurses Date: Mon, 21 Oct 2024 01:29:17 -0500 Subject: [PATCH 03/16] Add formatting Signed-off-by: erman-gurses --- linalg_ops/convolution/CMakeLists.txt | 2 +- linalg_ops/convolution/generate_e2e_conv2d_tests.py | 2 +- linalg_ops/convolution/generate_test_mlir_files.sh | 6 ------ linalg_ops/iree-e2e-conv2d-test.cc | 4 ++-- 4 files changed, 4 insertions(+), 10 deletions(-) diff --git a/linalg_ops/convolution/CMakeLists.txt b/linalg_ops/convolution/CMakeLists.txt index db18238..5b5f32e 100644 --- a/linalg_ops/convolution/CMakeLists.txt +++ b/linalg_ops/convolution/CMakeLists.txt @@ -72,4 +72,4 @@ foreach(_DTYPE IN LISTS _DTYPES) "default" ) endforeach() -endforeach() \ No newline at end of file +endforeach() diff --git a/linalg_ops/convolution/generate_e2e_conv2d_tests.py b/linalg_ops/convolution/generate_e2e_conv2d_tests.py index a492eb4..fdb6037 100644 --- a/linalg_ops/convolution/generate_e2e_conv2d_tests.py +++ b/linalg_ops/convolution/generate_e2e_conv2d_tests.py @@ -690,4 +690,4 @@ def main(args): if __name__ == "__main__": - main(parse_arguments()) \ No newline at end of file + main(parse_arguments()) diff --git a/linalg_ops/convolution/generate_test_mlir_files.sh b/linalg_ops/convolution/generate_test_mlir_files.sh index 35c4a7a..e742257 100755 --- a/linalg_ops/convolution/generate_test_mlir_files.sh +++ b/linalg_ops/convolution/generate_test_mlir_files.sh @@ -97,9 +97,3 @@ for type_combination in ${type_combinations[@]}; do --shapes=${shape} done done - -# input_type;kernel_type;acc_type -type_combinations=( - "f16;f16;f16" - "f32;f32;f32" -) \ No newline at end of file diff --git a/linalg_ops/iree-e2e-conv2d-test.cc b/linalg_ops/iree-e2e-conv2d-test.cc index 686fb4e..6bc2565 100644 --- a/linalg_ops/iree-e2e-conv2d-test.cc +++ b/linalg_ops/iree-e2e-conv2d-test.cc @@ -18,7 +18,7 @@ #include "iree/tooling/device_util.h" #include "iree/vm/api.h" #include "iree/vm/native_module_cc.h" -#include "tools/testing/e2e/test_utils.h" +#include "test_utils.h" //===----------------------------------------------------------------------===// // Reference conv2d (NCHW-FCHW) and (NHWC-HWCF) @@ -772,4 +772,4 @@ int main(int argc, char** argv) { IREE_TRACE_APP_EXIT(exit_code); return exit_code; -} \ No newline at end of file +} From 02407acd03722f24e10019c5398ac1ea5f93e6cd Mon Sep 17 00:00:00 2001 From: erman-gurses Date: Mon, 21 Oct 2024 01:34:36 -0500 Subject: [PATCH 04/16] Add formatting Signed-off-by: erman-gurses --- linalg_ops/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linalg_ops/CMakeLists.txt b/linalg_ops/CMakeLists.txt index c6a14aa..015b318 100644 --- a/linalg_ops/CMakeLists.txt +++ b/linalg_ops/CMakeLists.txt @@ -143,4 +143,4 @@ include(iree_test_suites_native_test) include(iree_test_suites_runner_test) add_subdirectory(matmul) -add_subdirectory(convolution) \ No newline at end of file +add_subdirectory(convolution) From f3573407618db561d3674348b5aff03878cf7834 Mon Sep 17 00:00:00 2001 From: erman-gurses Date: Mon, 21 Oct 2024 16:35:12 -0500 Subject: [PATCH 05/16] Remove winograd specific .mlir code generation Signed-off-by: erman-gurses --- .../convolution/generate_test_mlir_files.sh | 9 - .../conv2d_winograd_f16_f16_f16_large.mlir | 10 -- ...nv2d_winograd_f16_f16_f16_large_calls.mlir | 112 ------------ .../conv2d_winograd_f16_f16_f16_medium.mlir | 15 -- ...v2d_winograd_f16_f16_f16_medium_calls.mlir | 163 ------------------ .../conv2d_winograd_f16_f16_f16_small.mlir | 15 -- ...nv2d_winograd_f16_f16_f16_small_calls.mlir | 163 ------------------ .../conv2d_winograd_f32_f32_f32_large.mlir | 10 -- ...nv2d_winograd_f32_f32_f32_large_calls.mlir | 112 ------------ .../conv2d_winograd_f32_f32_f32_medium.mlir | 15 -- ...v2d_winograd_f32_f32_f32_medium_calls.mlir | 163 ------------------ .../conv2d_winograd_f32_f32_f32_small.mlir | 15 -- ...nv2d_winograd_f32_f32_f32_small_calls.mlir | 163 ------------------ 13 files changed, 965 deletions(-) delete mode 100644 linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_large.mlir delete mode 100644 linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_large_calls.mlir delete mode 100644 linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_medium.mlir delete mode 100644 linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_medium_calls.mlir delete mode 100644 linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_small.mlir delete mode 100644 linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_small_calls.mlir delete mode 100644 linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_large.mlir delete mode 100644 linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_large_calls.mlir delete mode 100644 linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_medium.mlir delete mode 100644 linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_medium_calls.mlir delete mode 100644 linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_small.mlir delete mode 100644 linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_small_calls.mlir diff --git a/linalg_ops/convolution/generate_test_mlir_files.sh b/linalg_ops/convolution/generate_test_mlir_files.sh index e742257..dc3581a 100755 --- a/linalg_ops/convolution/generate_test_mlir_files.sh +++ b/linalg_ops/convolution/generate_test_mlir_files.sh @@ -86,14 +86,5 @@ for type_combination in ${type_combinations[@]}; do --kernel_type=${kernel_type} \ --acc_type=${acc_type} \ --shapes=${shape} - - name="conv2d_winograd_${type_name}_${shape}" - python ${this_dir}/generate_e2e_conv2d_tests.py \ - --output_conv2d_mlir=${type_combination_dir}/${name}.mlir \ - --output_calls_mlir=${type_combination_dir}/${name}_calls.mlir \ - --input_type=${input_type} \ - --kernel_type=${kernel_type} \ - --acc_type=${acc_type} \ - --shapes=${shape} done done diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_large.mlir b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_large.mlir deleted file mode 100644 index 51c8a1e..0000000 --- a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_large.mlir +++ /dev/null @@ -1,10 +0,0 @@ -func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16(%lhs: tensor<2x4x128x128xf16>, %rhs: tensor<8x4x3x3xf16>, %acc: tensor<2x8x126x126xf16>) -> tensor<2x8x126x126xf16> { - %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x4x128x128xf16>, tensor<8x4x3x3xf16>) outs(%acc: tensor<2x8x126x126xf16>) -> tensor<2x8x126x126xf16> - return %result: tensor<2x8x126x126xf16> -} - -func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16(%lhs: tensor<2x3x128x128xf16>, %rhs: tensor<12x3x3x3xf16>, %acc: tensor<2x12x126x126xf16>) -> tensor<2x12x126x126xf16> { - %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x3x128x128xf16>, tensor<12x3x3x3xf16>) outs(%acc: tensor<2x12x126x126xf16>) -> tensor<2x12x126x126xf16> - return %result: tensor<2x12x126x126xf16> -} - diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_large_calls.mlir b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_large_calls.mlir deleted file mode 100644 index 7dfb92f..0000000 --- a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_large_calls.mlir +++ /dev/null @@ -1,112 +0,0 @@ -builtin.module @calls attributes { - -} { - -func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view -func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) - -func.func private @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view -func.func private @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view - -func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16_2_4_128_128_8_3_3_acc_0() attributes { - iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x4x128x128x8x3x3"} -} { - %device_index = arith.constant 0 : index - %device = hal.devices.get %device_index : !hal.device - %input_dim0 = arith.constant 2 : i64 - %input_dim1 = arith.constant 4 : i64 - %input_dim2 = arith.constant 128 : i64 - %input_dim3 = arith.constant 128 : i64 - %input_element_type = hal.element_type : i32 - %input_seed = arith.constant 2 : i32 - %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %kernel_dim0 = arith.constant 8 : i64 - %kernel_dim1 = arith.constant 4 : i64 - %kernel_dim2 = arith.constant 3 : i64 - %kernel_dim3 = arith.constant 3 : i64 - %kernel_element_type = hal.element_type : i32 - %kernel_seed = arith.constant 3 : i32 - %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_dim0 = arith.constant 2 : i64 - %acc_dim1 = arith.constant 8 : i64 - %acc_dim2 = arith.constant 126 : i64 - %acc_dim3 = arith.constant 126 : i64 - %acc_element_type = hal.element_type : i32 - %acc_seed = arith.constant 4 : i32 - %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_copy_dim0 = arith.constant 2 : i64 - %acc_copy_dim1 = arith.constant 8 : i64 - %acc_copy_dim2 = arith.constant 126 : i64 - %acc_copy_dim3 = arith.constant 126 : i64 - %acc_copy_element_type = hal.element_type : i32 - %acc_copy_seed = arith.constant 4 : i32 - %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %result = call @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view - %n = arith.constant 2 : i64 - %c = arith.constant 4 : i64 - %h = arith.constant 128 : i64 - %w = arith.constant 128 : i64 - %f = arith.constant 8 : i64 - %kh = arith.constant 3 : i64 - %kw = arith.constant 3 : i64 - %layout = arith.constant 0 : i64 - %sh = arith.constant 1 : i64 - %sw = arith.constant 1 : i64 - %dh = arith.constant 1 : i64 - %dw = arith.constant 1 : i64 - call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () - return -} - -func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16_2_3_128_128_12_3_3_acc_1() attributes { - iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x3x128x128x12x3x3"} -} { - %device_index = arith.constant 0 : index - %device = hal.devices.get %device_index : !hal.device - %input_dim0 = arith.constant 2 : i64 - %input_dim1 = arith.constant 3 : i64 - %input_dim2 = arith.constant 128 : i64 - %input_dim3 = arith.constant 128 : i64 - %input_element_type = hal.element_type : i32 - %input_seed = arith.constant 5 : i32 - %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %kernel_dim0 = arith.constant 12 : i64 - %kernel_dim1 = arith.constant 3 : i64 - %kernel_dim2 = arith.constant 3 : i64 - %kernel_dim3 = arith.constant 3 : i64 - %kernel_element_type = hal.element_type : i32 - %kernel_seed = arith.constant 6 : i32 - %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_dim0 = arith.constant 2 : i64 - %acc_dim1 = arith.constant 12 : i64 - %acc_dim2 = arith.constant 126 : i64 - %acc_dim3 = arith.constant 126 : i64 - %acc_element_type = hal.element_type : i32 - %acc_seed = arith.constant 7 : i32 - %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_copy_dim0 = arith.constant 2 : i64 - %acc_copy_dim1 = arith.constant 12 : i64 - %acc_copy_dim2 = arith.constant 126 : i64 - %acc_copy_dim3 = arith.constant 126 : i64 - %acc_copy_element_type = hal.element_type : i32 - %acc_copy_seed = arith.constant 7 : i32 - %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %result = call @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view - %n = arith.constant 2 : i64 - %c = arith.constant 3 : i64 - %h = arith.constant 128 : i64 - %w = arith.constant 128 : i64 - %f = arith.constant 12 : i64 - %kh = arith.constant 3 : i64 - %kw = arith.constant 3 : i64 - %layout = arith.constant 0 : i64 - %sh = arith.constant 1 : i64 - %sw = arith.constant 1 : i64 - %dh = arith.constant 1 : i64 - %dw = arith.constant 1 : i64 - call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () - return -} - - -} diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_medium.mlir b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_medium.mlir deleted file mode 100644 index a2564aa..0000000 --- a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_medium.mlir +++ /dev/null @@ -1,15 +0,0 @@ -func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%lhs: tensor<2x2x32x32xf16>, %rhs: tensor<2x2x3x3xf16>, %acc: tensor<2x2x30x30xf16>) -> tensor<2x2x30x30xf16> { - %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<2x2x3x3xf16>) outs(%acc: tensor<2x2x30x30xf16>) -> tensor<2x2x30x30xf16> - return %result: tensor<2x2x30x30xf16> -} - -func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16(%lhs: tensor<2x2x32x32xf16>, %rhs: tensor<64x2x3x3xf16>, %acc: tensor<2x64x30x30xf16>) -> tensor<2x64x30x30xf16> { - %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<64x2x3x3xf16>) outs(%acc: tensor<2x64x30x30xf16>) -> tensor<2x64x30x30xf16> - return %result: tensor<2x64x30x30xf16> -} - -func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f16(%lhs: tensor<2x32x32x32xf16>, %rhs: tensor<64x32x3x3xf16>, %acc: tensor<2x64x30x30xf16>) -> tensor<2x64x30x30xf16> { - %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x32xf16>, tensor<64x32x3x3xf16>) outs(%acc: tensor<2x64x30x30xf16>) -> tensor<2x64x30x30xf16> - return %result: tensor<2x64x30x30xf16> -} - diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_medium_calls.mlir b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_medium_calls.mlir deleted file mode 100644 index c6e86d7..0000000 --- a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_medium_calls.mlir +++ /dev/null @@ -1,163 +0,0 @@ -builtin.module @calls attributes { - -} { - -func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view -func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) - -func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view -func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view -func.func private @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view - -func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16_2_2_32_32_2_3_3_acc_0() attributes { - iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} -} { - %device_index = arith.constant 0 : index - %device = hal.devices.get %device_index : !hal.device - %input_dim0 = arith.constant 2 : i64 - %input_dim1 = arith.constant 2 : i64 - %input_dim2 = arith.constant 32 : i64 - %input_dim3 = arith.constant 32 : i64 - %input_element_type = hal.element_type : i32 - %input_seed = arith.constant 2 : i32 - %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %kernel_dim0 = arith.constant 2 : i64 - %kernel_dim1 = arith.constant 2 : i64 - %kernel_dim2 = arith.constant 3 : i64 - %kernel_dim3 = arith.constant 3 : i64 - %kernel_element_type = hal.element_type : i32 - %kernel_seed = arith.constant 3 : i32 - %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_dim0 = arith.constant 2 : i64 - %acc_dim1 = arith.constant 2 : i64 - %acc_dim2 = arith.constant 30 : i64 - %acc_dim3 = arith.constant 30 : i64 - %acc_element_type = hal.element_type : i32 - %acc_seed = arith.constant 4 : i32 - %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_copy_dim0 = arith.constant 2 : i64 - %acc_copy_dim1 = arith.constant 2 : i64 - %acc_copy_dim2 = arith.constant 30 : i64 - %acc_copy_dim3 = arith.constant 30 : i64 - %acc_copy_element_type = hal.element_type : i32 - %acc_copy_seed = arith.constant 4 : i32 - %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view - %n = arith.constant 2 : i64 - %c = arith.constant 2 : i64 - %h = arith.constant 32 : i64 - %w = arith.constant 32 : i64 - %f = arith.constant 2 : i64 - %kh = arith.constant 3 : i64 - %kw = arith.constant 3 : i64 - %layout = arith.constant 0 : i64 - %sh = arith.constant 1 : i64 - %sw = arith.constant 1 : i64 - %dh = arith.constant 1 : i64 - %dw = arith.constant 1 : i64 - call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () - return -} - -func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16_2_2_32_32_64_3_3_acc_1() attributes { - iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x64x3x3"} -} { - %device_index = arith.constant 0 : index - %device = hal.devices.get %device_index : !hal.device - %input_dim0 = arith.constant 2 : i64 - %input_dim1 = arith.constant 2 : i64 - %input_dim2 = arith.constant 32 : i64 - %input_dim3 = arith.constant 32 : i64 - %input_element_type = hal.element_type : i32 - %input_seed = arith.constant 5 : i32 - %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %kernel_dim0 = arith.constant 64 : i64 - %kernel_dim1 = arith.constant 2 : i64 - %kernel_dim2 = arith.constant 3 : i64 - %kernel_dim3 = arith.constant 3 : i64 - %kernel_element_type = hal.element_type : i32 - %kernel_seed = arith.constant 6 : i32 - %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_dim0 = arith.constant 2 : i64 - %acc_dim1 = arith.constant 64 : i64 - %acc_dim2 = arith.constant 30 : i64 - %acc_dim3 = arith.constant 30 : i64 - %acc_element_type = hal.element_type : i32 - %acc_seed = arith.constant 7 : i32 - %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_copy_dim0 = arith.constant 2 : i64 - %acc_copy_dim1 = arith.constant 64 : i64 - %acc_copy_dim2 = arith.constant 30 : i64 - %acc_copy_dim3 = arith.constant 30 : i64 - %acc_copy_element_type = hal.element_type : i32 - %acc_copy_seed = arith.constant 7 : i32 - %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view - %n = arith.constant 2 : i64 - %c = arith.constant 2 : i64 - %h = arith.constant 32 : i64 - %w = arith.constant 32 : i64 - %f = arith.constant 64 : i64 - %kh = arith.constant 3 : i64 - %kw = arith.constant 3 : i64 - %layout = arith.constant 0 : i64 - %sh = arith.constant 1 : i64 - %sw = arith.constant 1 : i64 - %dh = arith.constant 1 : i64 - %dw = arith.constant 1 : i64 - call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () - return -} - -func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f16_2_32_32_32_64_3_3_acc_2() attributes { - iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x32x32x32x64x3x3"} -} { - %device_index = arith.constant 0 : index - %device = hal.devices.get %device_index : !hal.device - %input_dim0 = arith.constant 2 : i64 - %input_dim1 = arith.constant 32 : i64 - %input_dim2 = arith.constant 32 : i64 - %input_dim3 = arith.constant 32 : i64 - %input_element_type = hal.element_type : i32 - %input_seed = arith.constant 8 : i32 - %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %kernel_dim0 = arith.constant 64 : i64 - %kernel_dim1 = arith.constant 32 : i64 - %kernel_dim2 = arith.constant 3 : i64 - %kernel_dim3 = arith.constant 3 : i64 - %kernel_element_type = hal.element_type : i32 - %kernel_seed = arith.constant 9 : i32 - %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_dim0 = arith.constant 2 : i64 - %acc_dim1 = arith.constant 64 : i64 - %acc_dim2 = arith.constant 30 : i64 - %acc_dim3 = arith.constant 30 : i64 - %acc_element_type = hal.element_type : i32 - %acc_seed = arith.constant 10 : i32 - %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_copy_dim0 = arith.constant 2 : i64 - %acc_copy_dim1 = arith.constant 64 : i64 - %acc_copy_dim2 = arith.constant 30 : i64 - %acc_copy_dim3 = arith.constant 30 : i64 - %acc_copy_element_type = hal.element_type : i32 - %acc_copy_seed = arith.constant 10 : i32 - %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %result = call @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view - %n = arith.constant 2 : i64 - %c = arith.constant 32 : i64 - %h = arith.constant 32 : i64 - %w = arith.constant 32 : i64 - %f = arith.constant 64 : i64 - %kh = arith.constant 3 : i64 - %kw = arith.constant 3 : i64 - %layout = arith.constant 0 : i64 - %sh = arith.constant 1 : i64 - %sw = arith.constant 1 : i64 - %dh = arith.constant 1 : i64 - %dw = arith.constant 1 : i64 - call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () - return -} - - -} diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_small.mlir b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_small.mlir deleted file mode 100644 index ddbe425..0000000 --- a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_small.mlir +++ /dev/null @@ -1,15 +0,0 @@ -func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16(%lhs: tensor<1x1x1x1xf16>, %rhs: tensor<1x1x1x1xf16>, %acc: tensor<1x1x1x1xf16>) -> tensor<1x1x1x1xf16> { - %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x1x1xf16>, tensor<1x1x1x1xf16>) outs(%acc: tensor<1x1x1x1xf16>) -> tensor<1x1x1x1xf16> - return %result: tensor<1x1x1x1xf16> -} - -func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16(%lhs: tensor<1x1x16x16xf16>, %rhs: tensor<1x1x2x2xf16>, %acc: tensor<1x1x15x15xf16>) -> tensor<1x1x15x15xf16> { - %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x16x16xf16>, tensor<1x1x2x2xf16>) outs(%acc: tensor<1x1x15x15xf16>) -> tensor<1x1x15x15xf16> - return %result: tensor<1x1x15x15xf16> -} - -func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%lhs: tensor<2x2x32x32xf16>, %rhs: tensor<2x2x3x3xf16>, %acc: tensor<2x2x30x30xf16>) -> tensor<2x2x30x30xf16> { - %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<2x2x3x3xf16>) outs(%acc: tensor<2x2x30x30xf16>) -> tensor<2x2x30x30xf16> - return %result: tensor<2x2x30x30xf16> -} - diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_small_calls.mlir b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_small_calls.mlir deleted file mode 100644 index 872c618..0000000 --- a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_winograd_f16_f16_f16_small_calls.mlir +++ /dev/null @@ -1,163 +0,0 @@ -builtin.module @calls attributes { - -} { - -func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view -func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) - -func.func private @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view -func.func private @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view -func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view - -func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16_1_1_1_1_1_1_1_acc_0() attributes { - iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x1x1x1x1x1"} -} { - %device_index = arith.constant 0 : index - %device = hal.devices.get %device_index : !hal.device - %input_dim0 = arith.constant 1 : i64 - %input_dim1 = arith.constant 1 : i64 - %input_dim2 = arith.constant 1 : i64 - %input_dim3 = arith.constant 1 : i64 - %input_element_type = hal.element_type : i32 - %input_seed = arith.constant 2 : i32 - %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %kernel_dim0 = arith.constant 1 : i64 - %kernel_dim1 = arith.constant 1 : i64 - %kernel_dim2 = arith.constant 1 : i64 - %kernel_dim3 = arith.constant 1 : i64 - %kernel_element_type = hal.element_type : i32 - %kernel_seed = arith.constant 3 : i32 - %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_dim0 = arith.constant 1 : i64 - %acc_dim1 = arith.constant 1 : i64 - %acc_dim2 = arith.constant 1 : i64 - %acc_dim3 = arith.constant 1 : i64 - %acc_element_type = hal.element_type : i32 - %acc_seed = arith.constant 4 : i32 - %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_copy_dim0 = arith.constant 1 : i64 - %acc_copy_dim1 = arith.constant 1 : i64 - %acc_copy_dim2 = arith.constant 1 : i64 - %acc_copy_dim3 = arith.constant 1 : i64 - %acc_copy_element_type = hal.element_type : i32 - %acc_copy_seed = arith.constant 4 : i32 - %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %result = call @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view - %n = arith.constant 1 : i64 - %c = arith.constant 1 : i64 - %h = arith.constant 1 : i64 - %w = arith.constant 1 : i64 - %f = arith.constant 1 : i64 - %kh = arith.constant 1 : i64 - %kw = arith.constant 1 : i64 - %layout = arith.constant 0 : i64 - %sh = arith.constant 1 : i64 - %sw = arith.constant 1 : i64 - %dh = arith.constant 1 : i64 - %dw = arith.constant 1 : i64 - call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () - return -} - -func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16_1_1_16_16_1_2_2_acc_1() attributes { - iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x16x16x1x2x2"} -} { - %device_index = arith.constant 0 : index - %device = hal.devices.get %device_index : !hal.device - %input_dim0 = arith.constant 1 : i64 - %input_dim1 = arith.constant 1 : i64 - %input_dim2 = arith.constant 16 : i64 - %input_dim3 = arith.constant 16 : i64 - %input_element_type = hal.element_type : i32 - %input_seed = arith.constant 5 : i32 - %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %kernel_dim0 = arith.constant 1 : i64 - %kernel_dim1 = arith.constant 1 : i64 - %kernel_dim2 = arith.constant 2 : i64 - %kernel_dim3 = arith.constant 2 : i64 - %kernel_element_type = hal.element_type : i32 - %kernel_seed = arith.constant 6 : i32 - %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_dim0 = arith.constant 1 : i64 - %acc_dim1 = arith.constant 1 : i64 - %acc_dim2 = arith.constant 15 : i64 - %acc_dim3 = arith.constant 15 : i64 - %acc_element_type = hal.element_type : i32 - %acc_seed = arith.constant 7 : i32 - %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_copy_dim0 = arith.constant 1 : i64 - %acc_copy_dim1 = arith.constant 1 : i64 - %acc_copy_dim2 = arith.constant 15 : i64 - %acc_copy_dim3 = arith.constant 15 : i64 - %acc_copy_element_type = hal.element_type : i32 - %acc_copy_seed = arith.constant 7 : i32 - %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %result = call @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view - %n = arith.constant 1 : i64 - %c = arith.constant 1 : i64 - %h = arith.constant 16 : i64 - %w = arith.constant 16 : i64 - %f = arith.constant 1 : i64 - %kh = arith.constant 2 : i64 - %kw = arith.constant 2 : i64 - %layout = arith.constant 0 : i64 - %sh = arith.constant 1 : i64 - %sw = arith.constant 1 : i64 - %dh = arith.constant 1 : i64 - %dw = arith.constant 1 : i64 - call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () - return -} - -func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16_2_2_32_32_2_3_3_acc_2() attributes { - iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} -} { - %device_index = arith.constant 0 : index - %device = hal.devices.get %device_index : !hal.device - %input_dim0 = arith.constant 2 : i64 - %input_dim1 = arith.constant 2 : i64 - %input_dim2 = arith.constant 32 : i64 - %input_dim3 = arith.constant 32 : i64 - %input_element_type = hal.element_type : i32 - %input_seed = arith.constant 8 : i32 - %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %kernel_dim0 = arith.constant 2 : i64 - %kernel_dim1 = arith.constant 2 : i64 - %kernel_dim2 = arith.constant 3 : i64 - %kernel_dim3 = arith.constant 3 : i64 - %kernel_element_type = hal.element_type : i32 - %kernel_seed = arith.constant 9 : i32 - %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_dim0 = arith.constant 2 : i64 - %acc_dim1 = arith.constant 2 : i64 - %acc_dim2 = arith.constant 30 : i64 - %acc_dim3 = arith.constant 30 : i64 - %acc_element_type = hal.element_type : i32 - %acc_seed = arith.constant 10 : i32 - %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_copy_dim0 = arith.constant 2 : i64 - %acc_copy_dim1 = arith.constant 2 : i64 - %acc_copy_dim2 = arith.constant 30 : i64 - %acc_copy_dim3 = arith.constant 30 : i64 - %acc_copy_element_type = hal.element_type : i32 - %acc_copy_seed = arith.constant 10 : i32 - %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view - %n = arith.constant 2 : i64 - %c = arith.constant 2 : i64 - %h = arith.constant 32 : i64 - %w = arith.constant 32 : i64 - %f = arith.constant 2 : i64 - %kh = arith.constant 3 : i64 - %kw = arith.constant 3 : i64 - %layout = arith.constant 0 : i64 - %sh = arith.constant 1 : i64 - %sw = arith.constant 1 : i64 - %dh = arith.constant 1 : i64 - %dw = arith.constant 1 : i64 - call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () - return -} - - -} diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_large.mlir b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_large.mlir deleted file mode 100644 index a47185c..0000000 --- a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_large.mlir +++ /dev/null @@ -1,10 +0,0 @@ -func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32(%lhs: tensor<2x4x128x128xf32>, %rhs: tensor<8x4x3x3xf32>, %acc: tensor<2x8x126x126xf32>) -> tensor<2x8x126x126xf32> { - %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x4x128x128xf32>, tensor<8x4x3x3xf32>) outs(%acc: tensor<2x8x126x126xf32>) -> tensor<2x8x126x126xf32> - return %result: tensor<2x8x126x126xf32> -} - -func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32(%lhs: tensor<2x3x128x128xf32>, %rhs: tensor<12x3x3x3xf32>, %acc: tensor<2x12x126x126xf32>) -> tensor<2x12x126x126xf32> { - %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x3x128x128xf32>, tensor<12x3x3x3xf32>) outs(%acc: tensor<2x12x126x126xf32>) -> tensor<2x12x126x126xf32> - return %result: tensor<2x12x126x126xf32> -} - diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_large_calls.mlir b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_large_calls.mlir deleted file mode 100644 index cdd2788..0000000 --- a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_large_calls.mlir +++ /dev/null @@ -1,112 +0,0 @@ -builtin.module @calls attributes { - -} { - -func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view -func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) - -func.func private @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view -func.func private @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view - -func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32_2_4_128_128_8_3_3_acc_0() attributes { - iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x4x128x128x8x3x3"} -} { - %device_index = arith.constant 0 : index - %device = hal.devices.get %device_index : !hal.device - %input_dim0 = arith.constant 2 : i64 - %input_dim1 = arith.constant 4 : i64 - %input_dim2 = arith.constant 128 : i64 - %input_dim3 = arith.constant 128 : i64 - %input_element_type = hal.element_type : i32 - %input_seed = arith.constant 2 : i32 - %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %kernel_dim0 = arith.constant 8 : i64 - %kernel_dim1 = arith.constant 4 : i64 - %kernel_dim2 = arith.constant 3 : i64 - %kernel_dim3 = arith.constant 3 : i64 - %kernel_element_type = hal.element_type : i32 - %kernel_seed = arith.constant 3 : i32 - %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_dim0 = arith.constant 2 : i64 - %acc_dim1 = arith.constant 8 : i64 - %acc_dim2 = arith.constant 126 : i64 - %acc_dim3 = arith.constant 126 : i64 - %acc_element_type = hal.element_type : i32 - %acc_seed = arith.constant 4 : i32 - %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_copy_dim0 = arith.constant 2 : i64 - %acc_copy_dim1 = arith.constant 8 : i64 - %acc_copy_dim2 = arith.constant 126 : i64 - %acc_copy_dim3 = arith.constant 126 : i64 - %acc_copy_element_type = hal.element_type : i32 - %acc_copy_seed = arith.constant 4 : i32 - %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %result = call @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view - %n = arith.constant 2 : i64 - %c = arith.constant 4 : i64 - %h = arith.constant 128 : i64 - %w = arith.constant 128 : i64 - %f = arith.constant 8 : i64 - %kh = arith.constant 3 : i64 - %kw = arith.constant 3 : i64 - %layout = arith.constant 0 : i64 - %sh = arith.constant 1 : i64 - %sw = arith.constant 1 : i64 - %dh = arith.constant 1 : i64 - %dw = arith.constant 1 : i64 - call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () - return -} - -func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32_2_3_128_128_12_3_3_acc_1() attributes { - iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x3x128x128x12x3x3"} -} { - %device_index = arith.constant 0 : index - %device = hal.devices.get %device_index : !hal.device - %input_dim0 = arith.constant 2 : i64 - %input_dim1 = arith.constant 3 : i64 - %input_dim2 = arith.constant 128 : i64 - %input_dim3 = arith.constant 128 : i64 - %input_element_type = hal.element_type : i32 - %input_seed = arith.constant 5 : i32 - %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %kernel_dim0 = arith.constant 12 : i64 - %kernel_dim1 = arith.constant 3 : i64 - %kernel_dim2 = arith.constant 3 : i64 - %kernel_dim3 = arith.constant 3 : i64 - %kernel_element_type = hal.element_type : i32 - %kernel_seed = arith.constant 6 : i32 - %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_dim0 = arith.constant 2 : i64 - %acc_dim1 = arith.constant 12 : i64 - %acc_dim2 = arith.constant 126 : i64 - %acc_dim3 = arith.constant 126 : i64 - %acc_element_type = hal.element_type : i32 - %acc_seed = arith.constant 7 : i32 - %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_copy_dim0 = arith.constant 2 : i64 - %acc_copy_dim1 = arith.constant 12 : i64 - %acc_copy_dim2 = arith.constant 126 : i64 - %acc_copy_dim3 = arith.constant 126 : i64 - %acc_copy_element_type = hal.element_type : i32 - %acc_copy_seed = arith.constant 7 : i32 - %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %result = call @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view - %n = arith.constant 2 : i64 - %c = arith.constant 3 : i64 - %h = arith.constant 128 : i64 - %w = arith.constant 128 : i64 - %f = arith.constant 12 : i64 - %kh = arith.constant 3 : i64 - %kw = arith.constant 3 : i64 - %layout = arith.constant 0 : i64 - %sh = arith.constant 1 : i64 - %sw = arith.constant 1 : i64 - %dh = arith.constant 1 : i64 - %dw = arith.constant 1 : i64 - call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () - return -} - - -} diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_medium.mlir b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_medium.mlir deleted file mode 100644 index e0a0376..0000000 --- a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_medium.mlir +++ /dev/null @@ -1,15 +0,0 @@ -func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%lhs: tensor<2x2x32x32xf32>, %rhs: tensor<2x2x3x3xf32>, %acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> { - %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf32>, tensor<2x2x3x3xf32>) outs(%acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> - return %result: tensor<2x2x30x30xf32> -} - -func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32(%lhs: tensor<2x2x32x32xf32>, %rhs: tensor<64x2x3x3xf32>, %acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> { - %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf32>, tensor<64x2x3x3xf32>) outs(%acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> - return %result: tensor<2x64x30x30xf32> -} - -func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f32_f32_f32(%lhs: tensor<2x32x32x32xf32>, %rhs: tensor<64x32x3x3xf32>, %acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> { - %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x32xf32>, tensor<64x32x3x3xf32>) outs(%acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> - return %result: tensor<2x64x30x30xf32> -} - diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_medium_calls.mlir b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_medium_calls.mlir deleted file mode 100644 index 3537bc9..0000000 --- a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_medium_calls.mlir +++ /dev/null @@ -1,163 +0,0 @@ -builtin.module @calls attributes { - -} { - -func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view -func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) - -func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view -func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view -func.func private @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view - -func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32_2_2_32_32_2_3_3_acc_0() attributes { - iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} -} { - %device_index = arith.constant 0 : index - %device = hal.devices.get %device_index : !hal.device - %input_dim0 = arith.constant 2 : i64 - %input_dim1 = arith.constant 2 : i64 - %input_dim2 = arith.constant 32 : i64 - %input_dim3 = arith.constant 32 : i64 - %input_element_type = hal.element_type : i32 - %input_seed = arith.constant 2 : i32 - %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %kernel_dim0 = arith.constant 2 : i64 - %kernel_dim1 = arith.constant 2 : i64 - %kernel_dim2 = arith.constant 3 : i64 - %kernel_dim3 = arith.constant 3 : i64 - %kernel_element_type = hal.element_type : i32 - %kernel_seed = arith.constant 3 : i32 - %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_dim0 = arith.constant 2 : i64 - %acc_dim1 = arith.constant 2 : i64 - %acc_dim2 = arith.constant 30 : i64 - %acc_dim3 = arith.constant 30 : i64 - %acc_element_type = hal.element_type : i32 - %acc_seed = arith.constant 4 : i32 - %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_copy_dim0 = arith.constant 2 : i64 - %acc_copy_dim1 = arith.constant 2 : i64 - %acc_copy_dim2 = arith.constant 30 : i64 - %acc_copy_dim3 = arith.constant 30 : i64 - %acc_copy_element_type = hal.element_type : i32 - %acc_copy_seed = arith.constant 4 : i32 - %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view - %n = arith.constant 2 : i64 - %c = arith.constant 2 : i64 - %h = arith.constant 32 : i64 - %w = arith.constant 32 : i64 - %f = arith.constant 2 : i64 - %kh = arith.constant 3 : i64 - %kw = arith.constant 3 : i64 - %layout = arith.constant 0 : i64 - %sh = arith.constant 1 : i64 - %sw = arith.constant 1 : i64 - %dh = arith.constant 1 : i64 - %dw = arith.constant 1 : i64 - call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () - return -} - -func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32_2_2_32_32_64_3_3_acc_1() attributes { - iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x64x3x3"} -} { - %device_index = arith.constant 0 : index - %device = hal.devices.get %device_index : !hal.device - %input_dim0 = arith.constant 2 : i64 - %input_dim1 = arith.constant 2 : i64 - %input_dim2 = arith.constant 32 : i64 - %input_dim3 = arith.constant 32 : i64 - %input_element_type = hal.element_type : i32 - %input_seed = arith.constant 5 : i32 - %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %kernel_dim0 = arith.constant 64 : i64 - %kernel_dim1 = arith.constant 2 : i64 - %kernel_dim2 = arith.constant 3 : i64 - %kernel_dim3 = arith.constant 3 : i64 - %kernel_element_type = hal.element_type : i32 - %kernel_seed = arith.constant 6 : i32 - %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_dim0 = arith.constant 2 : i64 - %acc_dim1 = arith.constant 64 : i64 - %acc_dim2 = arith.constant 30 : i64 - %acc_dim3 = arith.constant 30 : i64 - %acc_element_type = hal.element_type : i32 - %acc_seed = arith.constant 7 : i32 - %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_copy_dim0 = arith.constant 2 : i64 - %acc_copy_dim1 = arith.constant 64 : i64 - %acc_copy_dim2 = arith.constant 30 : i64 - %acc_copy_dim3 = arith.constant 30 : i64 - %acc_copy_element_type = hal.element_type : i32 - %acc_copy_seed = arith.constant 7 : i32 - %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view - %n = arith.constant 2 : i64 - %c = arith.constant 2 : i64 - %h = arith.constant 32 : i64 - %w = arith.constant 32 : i64 - %f = arith.constant 64 : i64 - %kh = arith.constant 3 : i64 - %kw = arith.constant 3 : i64 - %layout = arith.constant 0 : i64 - %sh = arith.constant 1 : i64 - %sw = arith.constant 1 : i64 - %dh = arith.constant 1 : i64 - %dw = arith.constant 1 : i64 - call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () - return -} - -func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f32_f32_f32_2_32_32_32_64_3_3_acc_2() attributes { - iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x32x32x32x64x3x3"} -} { - %device_index = arith.constant 0 : index - %device = hal.devices.get %device_index : !hal.device - %input_dim0 = arith.constant 2 : i64 - %input_dim1 = arith.constant 32 : i64 - %input_dim2 = arith.constant 32 : i64 - %input_dim3 = arith.constant 32 : i64 - %input_element_type = hal.element_type : i32 - %input_seed = arith.constant 8 : i32 - %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %kernel_dim0 = arith.constant 64 : i64 - %kernel_dim1 = arith.constant 32 : i64 - %kernel_dim2 = arith.constant 3 : i64 - %kernel_dim3 = arith.constant 3 : i64 - %kernel_element_type = hal.element_type : i32 - %kernel_seed = arith.constant 9 : i32 - %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_dim0 = arith.constant 2 : i64 - %acc_dim1 = arith.constant 64 : i64 - %acc_dim2 = arith.constant 30 : i64 - %acc_dim3 = arith.constant 30 : i64 - %acc_element_type = hal.element_type : i32 - %acc_seed = arith.constant 10 : i32 - %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_copy_dim0 = arith.constant 2 : i64 - %acc_copy_dim1 = arith.constant 64 : i64 - %acc_copy_dim2 = arith.constant 30 : i64 - %acc_copy_dim3 = arith.constant 30 : i64 - %acc_copy_element_type = hal.element_type : i32 - %acc_copy_seed = arith.constant 10 : i32 - %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %result = call @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view - %n = arith.constant 2 : i64 - %c = arith.constant 32 : i64 - %h = arith.constant 32 : i64 - %w = arith.constant 32 : i64 - %f = arith.constant 64 : i64 - %kh = arith.constant 3 : i64 - %kw = arith.constant 3 : i64 - %layout = arith.constant 0 : i64 - %sh = arith.constant 1 : i64 - %sw = arith.constant 1 : i64 - %dh = arith.constant 1 : i64 - %dw = arith.constant 1 : i64 - call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () - return -} - - -} diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_small.mlir b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_small.mlir deleted file mode 100644 index 9ecd2bd..0000000 --- a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_small.mlir +++ /dev/null @@ -1,15 +0,0 @@ -func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32(%lhs: tensor<1x1x1x1xf32>, %rhs: tensor<1x1x1x1xf32>, %acc: tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32> { - %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x1x1xf32>, tensor<1x1x1x1xf32>) outs(%acc: tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32> - return %result: tensor<1x1x1x1xf32> -} - -func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32(%lhs: tensor<1x1x16x16xf32>, %rhs: tensor<1x1x2x2xf32>, %acc: tensor<1x1x15x15xf32>) -> tensor<1x1x15x15xf32> { - %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x16x16xf32>, tensor<1x1x2x2xf32>) outs(%acc: tensor<1x1x15x15xf32>) -> tensor<1x1x15x15xf32> - return %result: tensor<1x1x15x15xf32> -} - -func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%lhs: tensor<2x2x32x32xf32>, %rhs: tensor<2x2x3x3xf32>, %acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> { - %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf32>, tensor<2x2x3x3xf32>) outs(%acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> - return %result: tensor<2x2x30x30xf32> -} - diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_small_calls.mlir b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_small_calls.mlir deleted file mode 100644 index 092bd67..0000000 --- a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_winograd_f32_f32_f32_small_calls.mlir +++ /dev/null @@ -1,163 +0,0 @@ -builtin.module @calls attributes { - -} { - -func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view -func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) - -func.func private @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view -func.func private @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view -func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view - -func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32_1_1_1_1_1_1_1_acc_0() attributes { - iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x1x1x1x1x1"} -} { - %device_index = arith.constant 0 : index - %device = hal.devices.get %device_index : !hal.device - %input_dim0 = arith.constant 1 : i64 - %input_dim1 = arith.constant 1 : i64 - %input_dim2 = arith.constant 1 : i64 - %input_dim3 = arith.constant 1 : i64 - %input_element_type = hal.element_type : i32 - %input_seed = arith.constant 2 : i32 - %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %kernel_dim0 = arith.constant 1 : i64 - %kernel_dim1 = arith.constant 1 : i64 - %kernel_dim2 = arith.constant 1 : i64 - %kernel_dim3 = arith.constant 1 : i64 - %kernel_element_type = hal.element_type : i32 - %kernel_seed = arith.constant 3 : i32 - %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_dim0 = arith.constant 1 : i64 - %acc_dim1 = arith.constant 1 : i64 - %acc_dim2 = arith.constant 1 : i64 - %acc_dim3 = arith.constant 1 : i64 - %acc_element_type = hal.element_type : i32 - %acc_seed = arith.constant 4 : i32 - %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_copy_dim0 = arith.constant 1 : i64 - %acc_copy_dim1 = arith.constant 1 : i64 - %acc_copy_dim2 = arith.constant 1 : i64 - %acc_copy_dim3 = arith.constant 1 : i64 - %acc_copy_element_type = hal.element_type : i32 - %acc_copy_seed = arith.constant 4 : i32 - %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %result = call @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view - %n = arith.constant 1 : i64 - %c = arith.constant 1 : i64 - %h = arith.constant 1 : i64 - %w = arith.constant 1 : i64 - %f = arith.constant 1 : i64 - %kh = arith.constant 1 : i64 - %kw = arith.constant 1 : i64 - %layout = arith.constant 0 : i64 - %sh = arith.constant 1 : i64 - %sw = arith.constant 1 : i64 - %dh = arith.constant 1 : i64 - %dw = arith.constant 1 : i64 - call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () - return -} - -func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32_1_1_16_16_1_2_2_acc_1() attributes { - iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x16x16x1x2x2"} -} { - %device_index = arith.constant 0 : index - %device = hal.devices.get %device_index : !hal.device - %input_dim0 = arith.constant 1 : i64 - %input_dim1 = arith.constant 1 : i64 - %input_dim2 = arith.constant 16 : i64 - %input_dim3 = arith.constant 16 : i64 - %input_element_type = hal.element_type : i32 - %input_seed = arith.constant 5 : i32 - %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %kernel_dim0 = arith.constant 1 : i64 - %kernel_dim1 = arith.constant 1 : i64 - %kernel_dim2 = arith.constant 2 : i64 - %kernel_dim3 = arith.constant 2 : i64 - %kernel_element_type = hal.element_type : i32 - %kernel_seed = arith.constant 6 : i32 - %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_dim0 = arith.constant 1 : i64 - %acc_dim1 = arith.constant 1 : i64 - %acc_dim2 = arith.constant 15 : i64 - %acc_dim3 = arith.constant 15 : i64 - %acc_element_type = hal.element_type : i32 - %acc_seed = arith.constant 7 : i32 - %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_copy_dim0 = arith.constant 1 : i64 - %acc_copy_dim1 = arith.constant 1 : i64 - %acc_copy_dim2 = arith.constant 15 : i64 - %acc_copy_dim3 = arith.constant 15 : i64 - %acc_copy_element_type = hal.element_type : i32 - %acc_copy_seed = arith.constant 7 : i32 - %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %result = call @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view - %n = arith.constant 1 : i64 - %c = arith.constant 1 : i64 - %h = arith.constant 16 : i64 - %w = arith.constant 16 : i64 - %f = arith.constant 1 : i64 - %kh = arith.constant 2 : i64 - %kw = arith.constant 2 : i64 - %layout = arith.constant 0 : i64 - %sh = arith.constant 1 : i64 - %sw = arith.constant 1 : i64 - %dh = arith.constant 1 : i64 - %dw = arith.constant 1 : i64 - call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () - return -} - -func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32_2_2_32_32_2_3_3_acc_2() attributes { - iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} -} { - %device_index = arith.constant 0 : index - %device = hal.devices.get %device_index : !hal.device - %input_dim0 = arith.constant 2 : i64 - %input_dim1 = arith.constant 2 : i64 - %input_dim2 = arith.constant 32 : i64 - %input_dim3 = arith.constant 32 : i64 - %input_element_type = hal.element_type : i32 - %input_seed = arith.constant 8 : i32 - %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %kernel_dim0 = arith.constant 2 : i64 - %kernel_dim1 = arith.constant 2 : i64 - %kernel_dim2 = arith.constant 3 : i64 - %kernel_dim3 = arith.constant 3 : i64 - %kernel_element_type = hal.element_type : i32 - %kernel_seed = arith.constant 9 : i32 - %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_dim0 = arith.constant 2 : i64 - %acc_dim1 = arith.constant 2 : i64 - %acc_dim2 = arith.constant 30 : i64 - %acc_dim3 = arith.constant 30 : i64 - %acc_element_type = hal.element_type : i32 - %acc_seed = arith.constant 10 : i32 - %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %acc_copy_dim0 = arith.constant 2 : i64 - %acc_copy_dim1 = arith.constant 2 : i64 - %acc_copy_dim2 = arith.constant 30 : i64 - %acc_copy_dim3 = arith.constant 30 : i64 - %acc_copy_element_type = hal.element_type : i32 - %acc_copy_seed = arith.constant 10 : i32 - %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view - %n = arith.constant 2 : i64 - %c = arith.constant 2 : i64 - %h = arith.constant 32 : i64 - %w = arith.constant 32 : i64 - %f = arith.constant 2 : i64 - %kh = arith.constant 3 : i64 - %kw = arith.constant 3 : i64 - %layout = arith.constant 0 : i64 - %sh = arith.constant 1 : i64 - %sw = arith.constant 1 : i64 - %dh = arith.constant 1 : i64 - %dw = arith.constant 1 : i64 - call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () - return -} - - -} From 66a927c3656abedbb757a8b17f62596b5dc59ade Mon Sep 17 00:00:00 2001 From: erman-gurses Date: Mon, 21 Oct 2024 16:38:59 -0500 Subject: [PATCH 06/16] Remove unnecessary comments Signed-off-by: erman-gurses --- linalg_ops/convolution/generate_test_mlir_files.sh | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/linalg_ops/convolution/generate_test_mlir_files.sh b/linalg_ops/convolution/generate_test_mlir_files.sh index dc3581a..9137999 100755 --- a/linalg_ops/convolution/generate_test_mlir_files.sh +++ b/linalg_ops/convolution/generate_test_mlir_files.sh @@ -20,12 +20,6 @@ # conv2d_f16_f16_f16_medium.mlir # conv2d_f16_f16_f16_small_calls.mlir # conv2d_f16_f16_f16_small.mlir -# conv2d_winograd_f16_f16_f16_large_calls.mlir -# conv2d_winograd_f16_f16_f16_large.mlir -# conv2d_winograd_f16_f16_f16_medium_calls.mlir -# conv2d_winograd_f16_f16_f16_medium.mlir -# conv2d_winograd_f16_f16_f16_small_calls.mlir -# conv2d_winograd_f16_f16_f16_small.mlir # f32_f32_f32/ # conv2d_f32_f32_f32_large_calls.mlir # conv2d_f32_f32_f32_large.mlir @@ -33,12 +27,6 @@ # conv2d_f32_f32_f32_medium.mlir # conv2d_f32_f32_f32_small_calls.mlir # conv2d_f32_f32_f32_small.mlir -# conv2d_winograd_f32_f32_f32_large_calls.mlir -# conv2d_winograd_f32_f32_f32_large.mlir -# conv2d_winograd_f32_f32_f32_medium_calls.mlir -# conv2d_winograd_f32_f32_f32_medium.mlir -# conv2d_winograd_f32_f32_f32_small_calls.mlir -# conv2d_winograd_f32_f32_f32_small.mlir # ... # ... # Usage: From 8d63db70535ac402371bf3af391cab23a27d7e9c Mon Sep 17 00:00:00 2001 From: erman-gurses Date: Wed, 23 Oct 2024 17:16:49 -0500 Subject: [PATCH 07/16] Addressing multiple comments Signed-off-by: erman-gurses --- .../convolution/generate_e2e_conv2d_tests.py | 32 ++-- .../f16_f16_f16/conv2d_f16_f16_f16_large.mlir | 2 - .../conv2d_f16_f16_f16_large_calls.mlir | 4 - .../conv2d_f16_f16_f16_medium.mlir | 3 - .../conv2d_f16_f16_f16_medium_calls.mlir | 5 - .../f16_f16_f16/conv2d_f16_f16_f16_small.mlir | 3 - .../conv2d_f16_f16_f16_small_calls.mlir | 5 - .../f32_f32_f32/conv2d_f32_f32_f32_large.mlir | 2 - .../conv2d_f32_f32_f32_large_calls.mlir | 4 - .../conv2d_f32_f32_f32_medium.mlir | 3 - .../conv2d_f32_f32_f32_medium_calls.mlir | 5 - .../f32_f32_f32/conv2d_f32_f32_f32_small.mlir | 3 - .../conv2d_f32_f32_f32_small_calls.mlir | 5 - linalg_ops/iree-e2e-conv2d-test.cc | 166 +++++++++--------- 14 files changed, 104 insertions(+), 138 deletions(-) diff --git a/linalg_ops/convolution/generate_e2e_conv2d_tests.py b/linalg_ops/convolution/generate_e2e_conv2d_tests.py index fdb6037..491b48b 100644 --- a/linalg_ops/convolution/generate_e2e_conv2d_tests.py +++ b/linalg_ops/convolution/generate_e2e_conv2d_tests.py @@ -16,13 +16,14 @@ import itertools import re + # Data type of kernel entries. The string values must match MLIR data types. @enum.unique class KernelElemTypeId(enum.Enum): NONE = "" I8 = "i8" - F32 = "f32" F16 = "f16" + F32 = "f32" # Data type of input entries. The string values must match MLIR data types. @@ -30,8 +31,8 @@ class KernelElemTypeId(enum.Enum): class InputElemTypeId(enum.Enum): NONE = "" I8 = "i8" - F32 = "f32" F16 = "f16" + F32 = "f32" # Data type of input entries. The string values must match MLIR data types. @@ -42,6 +43,7 @@ class AccElemTypeId(enum.Enum): F32 = "f32" F16 = "f16" + # Enumerates of the collections of shapes that we can generate tests for. # The values are the accepted values for the --shapes= flag. @enum.unique @@ -116,9 +118,9 @@ def get_test_shapes(shapes_id: ShapesId): ] if shapes_id == ShapesId.MEDIUM: return [ - TestShape(n=2, c=2, h=32, w=32, kh=3, kw=3, f=2, accumulate=True), + TestShape(n=2, c=2, h=32, w=32, kh=3, kw=3, f=2, accumulate=True), TestShape(n=2, c=2, h=32, w=32, kh=3, kw=3, f=64, accumulate=True), - TestShape(n=2, h=32, w=32, c=32, kh=3, kw=3, f=64, accumulate=True), + TestShape(n=2, c=32, h=32, w=32, kh=3, kw=3, f=64, accumulate=True), ] if shapes_id == ShapesId.LARGE: return [ @@ -342,7 +344,7 @@ def generate_function( f"func.func @{func_name}(%lhs: {input_tensor_type}, %rhs: {kernel_tensor_type}, %acc: {acc_tensor_type}) -> {acc_tensor_type} {{\n" f" %result = {op_name} {conv_attr} ins(%lhs, %rhs: {input_tensor_type}, {kernel_tensor_type}) outs(%acc: {acc_tensor_type}) -> {acc_tensor_type}\n" f" return %result: {acc_tensor_type}\n" - f"}}\n" + f"}}" ) return MLIRFunction( @@ -428,7 +430,7 @@ def generate_call( raise ValueError(kernel_layout) else: raise ValueError(InputLayout) - + description = f"Conv2d shape (NxCxHxWxFxKHxKW): {shape.n}x{shape.c}x{shape.h}x{shape.w}x{shape.f}x{shape.kh}x{shape.kw}" op = ( f"func.func @{func_name}() attributes {{\n" @@ -476,7 +478,8 @@ def generate_call( f" %sw = arith.constant {conv2d_attr.STRIDE[1]} : i64\n" f" %dh = arith.constant {conv2d_attr.DILATION[0]} : i64\n" f" %dw = arith.constant {conv2d_attr.DILATION[1]} : i64\n" - f" call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> ()\n") + f" call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> ()\n" + ) op = op + " return\n" op = op + "}\n" @@ -621,7 +624,12 @@ def write_code_file(functions, filename): def write_calls_file(functions, calls, filename, requirements): + # TODO(scotttodd): write "GENERATED BY" comment to the top of the file + # Module-level reflection information used to control the test tool. + # TODO(scotttodd): drop this and whatever logic in the test tool used it + # multiple backends should be able to use the same input IR, so the + # input IR shouldn't need things like CPU features in it reflection = "" if requirements: reflection = ( @@ -638,9 +646,7 @@ def write_calls_file(functions, calls, filename, requirements): # Declare the custom module that generates arguments. module_definition = module_definition + ( "func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view\n" - "func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view)\n" - "\n" - + "func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view)\n" ) # Declare the functions that will be called. @@ -650,9 +656,9 @@ def write_calls_file(functions, calls, filename, requirements): # Emit the test cases for each call. for call in calls: - module_definition = module_definition + call.op + "\n" + module_definition = module_definition + call.op + "" - module_definition = module_definition + "\n}\n" + module_definition = module_definition + "}\n" with open(filename, "w") as file: file.write(module_definition) @@ -663,7 +669,7 @@ def main(args): input_layout = InputLayout(args.input_layout) kernel_type = KernelElemTypeId(args.kernel_type) kernel_layout = KernelLayout(args.kernel_layout) - acc_type = AccElemTypeId(args.acc_type) + acc_type = AccElemTypeId(args.acc_type) shapes_id = ShapesId(args.shapes) conv2d_attr = ConvAttrs( tuple(map(int, args.stride.split(","))), diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_large.mlir b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_large.mlir index 51c8a1e..ca13bae 100644 --- a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_large.mlir +++ b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_large.mlir @@ -2,9 +2,7 @@ func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16(%lhs: ten %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x4x128x128xf16>, tensor<8x4x3x3xf16>) outs(%acc: tensor<2x8x126x126xf16>) -> tensor<2x8x126x126xf16> return %result: tensor<2x8x126x126xf16> } - func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16(%lhs: tensor<2x3x128x128xf16>, %rhs: tensor<12x3x3x3xf16>, %acc: tensor<2x12x126x126xf16>) -> tensor<2x12x126x126xf16> { %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x3x128x128xf16>, tensor<12x3x3x3xf16>) outs(%acc: tensor<2x12x126x126xf16>) -> tensor<2x12x126x126xf16> return %result: tensor<2x12x126x126xf16> } - diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_large_calls.mlir b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_large_calls.mlir index 7dfb92f..ee31f04 100644 --- a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_large_calls.mlir +++ b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_large_calls.mlir @@ -4,7 +4,6 @@ builtin.module @calls attributes { func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) - func.func private @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view func.func private @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view @@ -57,7 +56,6 @@ func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16_2_4_128_1 call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () return } - func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16_2_3_128_128_12_3_3_acc_1() attributes { iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x3x128x128x12x3x3"} } { @@ -107,6 +105,4 @@ func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16_2_3_128_ call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () return } - - } diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_medium.mlir b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_medium.mlir index a2564aa..caba912 100644 --- a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_medium.mlir +++ b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_medium.mlir @@ -2,14 +2,11 @@ func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%lhs: tenso %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<2x2x3x3xf16>) outs(%acc: tensor<2x2x30x30xf16>) -> tensor<2x2x30x30xf16> return %result: tensor<2x2x30x30xf16> } - func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16(%lhs: tensor<2x2x32x32xf16>, %rhs: tensor<64x2x3x3xf16>, %acc: tensor<2x64x30x30xf16>) -> tensor<2x64x30x30xf16> { %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<64x2x3x3xf16>) outs(%acc: tensor<2x64x30x30xf16>) -> tensor<2x64x30x30xf16> return %result: tensor<2x64x30x30xf16> } - func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f16(%lhs: tensor<2x32x32x32xf16>, %rhs: tensor<64x32x3x3xf16>, %acc: tensor<2x64x30x30xf16>) -> tensor<2x64x30x30xf16> { %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x32xf16>, tensor<64x32x3x3xf16>) outs(%acc: tensor<2x64x30x30xf16>) -> tensor<2x64x30x30xf16> return %result: tensor<2x64x30x30xf16> } - diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_medium_calls.mlir b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_medium_calls.mlir index c6e86d7..60860a5 100644 --- a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_medium_calls.mlir +++ b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_medium_calls.mlir @@ -4,7 +4,6 @@ builtin.module @calls attributes { func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) - func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view func.func private @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view @@ -58,7 +57,6 @@ func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16_2_2_32_32_2 call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () return } - func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16_2_2_32_32_64_3_3_acc_1() attributes { iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x64x3x3"} } { @@ -108,7 +106,6 @@ func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16_2_2_32_32_ call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () return } - func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f16_2_32_32_32_64_3_3_acc_2() attributes { iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x32x32x32x64x3x3"} } { @@ -158,6 +155,4 @@ func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f16_2_32_32_3 call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () return } - - } diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_small.mlir b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_small.mlir index ddbe425..66fe7fd 100644 --- a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_small.mlir +++ b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_small.mlir @@ -2,14 +2,11 @@ func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16(%lhs: tensor< %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x1x1xf16>, tensor<1x1x1x1xf16>) outs(%acc: tensor<1x1x1x1xf16>) -> tensor<1x1x1x1xf16> return %result: tensor<1x1x1x1xf16> } - func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16(%lhs: tensor<1x1x16x16xf16>, %rhs: tensor<1x1x2x2xf16>, %acc: tensor<1x1x15x15xf16>) -> tensor<1x1x15x15xf16> { %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x16x16xf16>, tensor<1x1x2x2xf16>) outs(%acc: tensor<1x1x15x15xf16>) -> tensor<1x1x15x15xf16> return %result: tensor<1x1x15x15xf16> } - func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%lhs: tensor<2x2x32x32xf16>, %rhs: tensor<2x2x3x3xf16>, %acc: tensor<2x2x30x30xf16>) -> tensor<2x2x30x30xf16> { %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<2x2x3x3xf16>) outs(%acc: tensor<2x2x30x30xf16>) -> tensor<2x2x30x30xf16> return %result: tensor<2x2x30x30xf16> } - diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_small_calls.mlir b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_small_calls.mlir index 872c618..98438c6 100644 --- a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_small_calls.mlir +++ b/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_small_calls.mlir @@ -4,7 +4,6 @@ builtin.module @calls attributes { func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) - func.func private @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view func.func private @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view @@ -58,7 +57,6 @@ func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16_1_1_1_1_1_1_1 call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () return } - func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16_1_1_16_16_1_2_2_acc_1() attributes { iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x16x16x1x2x2"} } { @@ -108,7 +106,6 @@ func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16_1_1_16_16_1 call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () return } - func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16_2_2_32_32_2_3_3_acc_2() attributes { iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} } { @@ -158,6 +155,4 @@ func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16_2_2_32_32_2 call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () return } - - } diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_large.mlir b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_large.mlir index a47185c..1714e5b 100644 --- a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_large.mlir +++ b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_large.mlir @@ -2,9 +2,7 @@ func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32(%lhs: ten %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x4x128x128xf32>, tensor<8x4x3x3xf32>) outs(%acc: tensor<2x8x126x126xf32>) -> tensor<2x8x126x126xf32> return %result: tensor<2x8x126x126xf32> } - func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32(%lhs: tensor<2x3x128x128xf32>, %rhs: tensor<12x3x3x3xf32>, %acc: tensor<2x12x126x126xf32>) -> tensor<2x12x126x126xf32> { %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x3x128x128xf32>, tensor<12x3x3x3xf32>) outs(%acc: tensor<2x12x126x126xf32>) -> tensor<2x12x126x126xf32> return %result: tensor<2x12x126x126xf32> } - diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_large_calls.mlir b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_large_calls.mlir index cdd2788..ce81bc5 100644 --- a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_large_calls.mlir +++ b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_large_calls.mlir @@ -4,7 +4,6 @@ builtin.module @calls attributes { func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) - func.func private @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view func.func private @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view @@ -57,7 +56,6 @@ func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32_2_4_128_1 call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () return } - func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32_2_3_128_128_12_3_3_acc_1() attributes { iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x3x128x128x12x3x3"} } { @@ -107,6 +105,4 @@ func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32_2_3_128_ call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () return } - - } diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_medium.mlir b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_medium.mlir index e0a0376..97ff810 100644 --- a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_medium.mlir +++ b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_medium.mlir @@ -2,14 +2,11 @@ func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%lhs: tenso %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf32>, tensor<2x2x3x3xf32>) outs(%acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> return %result: tensor<2x2x30x30xf32> } - func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32(%lhs: tensor<2x2x32x32xf32>, %rhs: tensor<64x2x3x3xf32>, %acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> { %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf32>, tensor<64x2x3x3xf32>) outs(%acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> return %result: tensor<2x64x30x30xf32> } - func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f32_f32_f32(%lhs: tensor<2x32x32x32xf32>, %rhs: tensor<64x32x3x3xf32>, %acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> { %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x32xf32>, tensor<64x32x3x3xf32>) outs(%acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> return %result: tensor<2x64x30x30xf32> } - diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_medium_calls.mlir b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_medium_calls.mlir index 3537bc9..3a2f05c 100644 --- a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_medium_calls.mlir +++ b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_medium_calls.mlir @@ -4,7 +4,6 @@ builtin.module @calls attributes { func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) - func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view func.func private @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view @@ -58,7 +57,6 @@ func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32_2_2_32_32_2 call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () return } - func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32_2_2_32_32_64_3_3_acc_1() attributes { iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x64x3x3"} } { @@ -108,7 +106,6 @@ func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32_2_2_32_32_ call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () return } - func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f32_f32_f32_2_32_32_32_64_3_3_acc_2() attributes { iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x32x32x32x64x3x3"} } { @@ -158,6 +155,4 @@ func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f32_f32_f32_2_32_32_3 call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () return } - - } diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_small.mlir b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_small.mlir index 9ecd2bd..a4a08ad 100644 --- a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_small.mlir +++ b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_small.mlir @@ -2,14 +2,11 @@ func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32(%lhs: tensor< %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x1x1xf32>, tensor<1x1x1x1xf32>) outs(%acc: tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32> return %result: tensor<1x1x1x1xf32> } - func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32(%lhs: tensor<1x1x16x16xf32>, %rhs: tensor<1x1x2x2xf32>, %acc: tensor<1x1x15x15xf32>) -> tensor<1x1x15x15xf32> { %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x16x16xf32>, tensor<1x1x2x2xf32>) outs(%acc: tensor<1x1x15x15xf32>) -> tensor<1x1x15x15xf32> return %result: tensor<1x1x15x15xf32> } - func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%lhs: tensor<2x2x32x32xf32>, %rhs: tensor<2x2x3x3xf32>, %acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> { %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf32>, tensor<2x2x3x3xf32>) outs(%acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> return %result: tensor<2x2x30x30xf32> } - diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_small_calls.mlir b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_small_calls.mlir index 092bd67..9f01130 100644 --- a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_small_calls.mlir +++ b/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_small_calls.mlir @@ -4,7 +4,6 @@ builtin.module @calls attributes { func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) - func.func private @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view func.func private @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view @@ -58,7 +57,6 @@ func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32_1_1_1_1_1_1_1 call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () return } - func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32_1_1_16_16_1_2_2_acc_1() attributes { iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x16x16x1x2x2"} } { @@ -108,7 +106,6 @@ func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32_1_1_16_16_1 call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () return } - func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32_2_2_32_32_2_3_3_acc_2() attributes { iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} } { @@ -158,6 +155,4 @@ func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32_2_2_32_32_2 call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () return } - - } diff --git a/linalg_ops/iree-e2e-conv2d-test.cc b/linalg_ops/iree-e2e-conv2d-test.cc index 6bc2565..2d1e986 100644 --- a/linalg_ops/iree-e2e-conv2d-test.cc +++ b/linalg_ops/iree-e2e-conv2d-test.cc @@ -38,12 +38,11 @@ static void reference_conv2d_f16_f16_f16_f16( iree_hal_dim_t w_size, iree_hal_dim_t f_size, iree_hal_dim_t kh_size, iree_hal_dim_t kw_size, iree_hal_dim_t layout, iree_hal_dim_t sh_size, iree_hal_dim_t sw_size, iree_hal_dim_t dh_size, iree_hal_dim_t dw_size, - iree_hal_dim_t oh_size, iree_hal_dim_t ow_size, const uint16_t* input_data, - const uint16_t* kernel_data, const uint16_t* acc_data, - uint16_t* result_data, iree_hal_dim_t n, iree_hal_dim_t oc, + iree_hal_dim_t oh_size, iree_hal_dim_t ow_size, const uint16_t *input_data, + const uint16_t *kernel_data, const uint16_t *acc_data, + uint16_t *result_data, iree_hal_dim_t n, iree_hal_dim_t oc, iree_hal_dim_t oh, iree_hal_dim_t ow) { if (layout == 0) { - //printf("layout == 0\n"); // The layout of output tensor is NxfxOHxOW iree_hal_dim_t out_idx = convert_to_1d_index(f_size, oh_size, ow_size, n, oc, oh, ow); @@ -66,7 +65,6 @@ static void reference_conv2d_f16_f16_f16_f16( result_data[out_idx] = iree_math_f32_to_f16(acc); } } else if (layout == 1) { - //printf("layout == 1\n"); // The layout of output tensor is NxOHxOWxf iree_hal_dim_t out_idx = convert_to_1d_index(oh_size, ow_size, f_size, n, oh, ow, oc); @@ -97,8 +95,8 @@ static void reference_conv2d_f16_f16_f32_f32( iree_hal_dim_t w_size, iree_hal_dim_t f_size, iree_hal_dim_t kh_size, iree_hal_dim_t kw_size, iree_hal_dim_t layout, iree_hal_dim_t sh_size, iree_hal_dim_t sw_size, iree_hal_dim_t dh_size, iree_hal_dim_t dw_size, - iree_hal_dim_t oh_size, iree_hal_dim_t ow_size, const uint16_t* input_data, - const uint16_t* kernel_data, const float* acc_data, float* result_data, + iree_hal_dim_t oh_size, iree_hal_dim_t ow_size, const uint16_t *input_data, + const uint16_t *kernel_data, const float *acc_data, float *result_data, iree_hal_dim_t n, iree_hal_dim_t oc, iree_hal_dim_t oh, iree_hal_dim_t ow) { if (layout == 0) { // The layout of output tensor is NxfxOHxOW @@ -153,8 +151,8 @@ static void reference_conv2d_i8_i8_i32_i32( iree_hal_dim_t w_size, iree_hal_dim_t f_size, iree_hal_dim_t kh_size, iree_hal_dim_t kw_size, iree_hal_dim_t layout, iree_hal_dim_t sh_size, iree_hal_dim_t sw_size, iree_hal_dim_t dh_size, iree_hal_dim_t dw_size, - iree_hal_dim_t oh_size, iree_hal_dim_t ow_size, const int8_t* input_data, - const int8_t* kernel_data, const int32_t* acc_data, int32_t* result_data, + iree_hal_dim_t oh_size, iree_hal_dim_t ow_size, const int8_t *input_data, + const int8_t *kernel_data, const int32_t *acc_data, int32_t *result_data, iree_hal_dim_t n, iree_hal_dim_t oc, iree_hal_dim_t oh, iree_hal_dim_t ow) { if (layout == 0) { // The layout of output tensor is NxfxOHxOW @@ -210,8 +208,8 @@ static void reference_conv2d_f32_f32_f32_f32( iree_hal_dim_t w_size, iree_hal_dim_t f_size, iree_hal_dim_t kh_size, iree_hal_dim_t kw_size, iree_hal_dim_t layout, iree_hal_dim_t sh_size, iree_hal_dim_t sw_size, iree_hal_dim_t dh_size, iree_hal_dim_t dw_size, - iree_hal_dim_t oh_size, iree_hal_dim_t ow_size, const float* input_data, - const float* kernel_data, const float* acc_data, float* result_data, + iree_hal_dim_t oh_size, iree_hal_dim_t ow_size, const float *input_data, + const float *kernel_data, const float *acc_data, float *result_data, iree_hal_dim_t n, iree_hal_dim_t oc, iree_hal_dim_t oh, iree_hal_dim_t ow) { if (layout == 0) { // The layout of output tensor is NxfxOHxOW @@ -265,8 +263,8 @@ static iree_status_t reference_conv2d_element( iree_hal_dim_t sw_size, iree_hal_dim_t dh_size, iree_hal_dim_t dw_size, iree_hal_dim_t oh_size, iree_hal_dim_t ow_size, iree_hal_element_type_t input_type, iree_hal_element_type_t kernel_type, - iree_hal_element_type_t acc_type, void* input_data, void* kernel_data, - void* acc_data, void* result_data, iree_hal_dim_t n, iree_hal_dim_t oc, + iree_hal_element_type_t acc_type, void *input_data, void *kernel_data, + void *acc_data, void *result_data, iree_hal_dim_t n, iree_hal_dim_t oc, iree_hal_dim_t oh, iree_hal_dim_t ow) { if (input_type == IREE_HAL_ELEMENT_TYPE_FLOAT_32 && kernel_type == IREE_HAL_ELEMENT_TYPE_FLOAT_32 && @@ -274,35 +272,38 @@ static iree_status_t reference_conv2d_element( reference_conv2d_f32_f32_f32_f32( n_size, c_size, h_size, w_size, f_size, kh_size, kw_size, layout, sh_size, sw_size, dh_size, dw_size, oh_size, ow_size, - (const float*)input_data, (const float*)kernel_data, - (const float*)acc_data, (float*)result_data, n, oc, oh, ow); + (const float *)input_data, (const float *)kernel_data, + (const float *)acc_data, (float *)result_data, n, oc, oh, ow); } else if (input_type == IREE_HAL_ELEMENT_TYPE_FLOAT_16 && kernel_type == IREE_HAL_ELEMENT_TYPE_FLOAT_16 && acc_type == IREE_HAL_ELEMENT_TYPE_FLOAT_16) { reference_conv2d_f16_f16_f16_f16( n_size, c_size, h_size, w_size, f_size, kh_size, kw_size, layout, sh_size, sw_size, dh_size, dw_size, oh_size, ow_size, - (const uint16_t*)input_data, (const uint16_t*)kernel_data, - (const uint16_t*)acc_data, (uint16_t*)result_data, n, oc, oh, ow); + (const uint16_t *)input_data, (const uint16_t *)kernel_data, + (const uint16_t *)acc_data, (uint16_t *)result_data, n, oc, oh, ow); } else if (input_type == IREE_HAL_ELEMENT_TYPE_FLOAT_16 && kernel_type == IREE_HAL_ELEMENT_TYPE_FLOAT_16 && acc_type == IREE_HAL_ELEMENT_TYPE_FLOAT_32) { reference_conv2d_f16_f16_f32_f32( n_size, c_size, h_size, w_size, f_size, kh_size, kw_size, layout, sh_size, sw_size, dh_size, dw_size, oh_size, ow_size, - (const uint16_t*)input_data, (const uint16_t*)kernel_data, - (const float*)acc_data, (float*)result_data, n, oc, oh, ow); + (const uint16_t *)input_data, (const uint16_t *)kernel_data, + (const float *)acc_data, (float *)result_data, n, oc, oh, ow); } else if (input_type == IREE_HAL_ELEMENT_TYPE_INT_8 && kernel_type == IREE_HAL_ELEMENT_TYPE_INT_8 && acc_type == IREE_HAL_ELEMENT_TYPE_INT_32) { reference_conv2d_i8_i8_i32_i32( n_size, c_size, h_size, w_size, f_size, kh_size, kw_size, layout, sh_size, sw_size, dh_size, dw_size, oh_size, ow_size, - (const int8_t*)input_data, (const int8_t*)kernel_data, - (const int32_t*)acc_data, (int32_t*)result_data, n, oc, oh, ow); + (const int8_t *)input_data, (const int8_t *)kernel_data, + (const int32_t *)acc_data, (int32_t *)result_data, n, oc, oh, ow); } else { - return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, - "unhandled combination of element types in conv2d"); + return iree_make_status( + IREE_STATUS_INVALID_ARGUMENT, + "unhandled combination of element types in conv2d input_type: %d," + " kernel_type: %d, acc_type: %d", + input_type, kernel_type, acc_type); } return iree_ok_status(); } @@ -386,18 +387,18 @@ static iree_status_t reference_conv2d( typedef struct { iree_allocator_t host_allocator; - iree_hal_dim_t n; // batch dim - iree_hal_dim_t c; // input channels - iree_hal_dim_t h; // input height - iree_hal_dim_t w; // input width - iree_hal_dim_t f; // output channels - iree_hal_dim_t kh; // kernel height - iree_hal_dim_t kw; // kernel width - iree_hal_dim_t layout; // conv layout, 0 : nchwxfchw (default); 1: nhwcxhwcf - iree_hal_dim_t sh; // stride along height dim - iree_hal_dim_t sw; // stride along width dim - iree_hal_dim_t dh; // dilation along height dim - iree_hal_dim_t dw; // dilation along width dim + iree_hal_dim_t n; // batch dim + iree_hal_dim_t c; // input channels + iree_hal_dim_t h; // input height + iree_hal_dim_t w; // input width + iree_hal_dim_t f; // output channels + iree_hal_dim_t kh; // kernel height + iree_hal_dim_t kw; // kernel width + iree_hal_dim_t layout; // conv layout, 0 : nchwxfchw (default); 1: nhwcxhwcf + iree_hal_dim_t sh; // stride along height dim + iree_hal_dim_t sw; // stride along width dim + iree_hal_dim_t dh; // dilation along height dim + iree_hal_dim_t dw; // dilation along width dim iree_hal_element_type_t input_type; iree_hal_element_type_t kernel_type; iree_hal_element_type_t acc_type; @@ -409,17 +410,17 @@ typedef struct { iree_byte_span_t expected_contents; } conv2d_results_t; -static void conv2d_results_deinitialize(conv2d_results_t* results); +static void conv2d_results_deinitialize(conv2d_results_t *results); static iree_status_t conv2d_results_initialize( - iree_hal_device_t* device, iree_hal_dim_t n_size, iree_hal_dim_t c_size, + iree_hal_device_t *device, iree_hal_dim_t n_size, iree_hal_dim_t c_size, iree_hal_dim_t h_size, iree_hal_dim_t w_size, iree_hal_dim_t f_size, iree_hal_dim_t kh_size, iree_hal_dim_t kw_size, iree_hal_dim_t layout, iree_hal_dim_t sh_size, iree_hal_dim_t sw_size, iree_hal_dim_t dh_size, - iree_hal_dim_t dw_size, iree_hal_buffer_view_t* input, - iree_hal_buffer_view_t* kernel, iree_hal_buffer_view_t* acc, - iree_hal_buffer_view_t* result, iree_allocator_t host_allocator, - conv2d_results_t* out_results) { + iree_hal_dim_t dw_size, iree_hal_buffer_view_t *input, + iree_hal_buffer_view_t *kernel, iree_hal_buffer_view_t *acc, + iree_hal_buffer_view_t *result, iree_allocator_t host_allocator, + conv2d_results_t *out_results) { IREE_TRACE_ZONE_BEGIN(z0); memset(out_results, 0, sizeof(*out_results)); @@ -443,10 +444,10 @@ static iree_status_t conv2d_results_initialize( out_results->acc_type = iree_hal_buffer_view_element_type(acc); out_results->result_type = iree_hal_buffer_view_element_type(result); - iree_hal_buffer_t* input_buffer = iree_hal_buffer_view_buffer(input); - iree_hal_buffer_t* kernel_buffer = iree_hal_buffer_view_buffer(kernel); - iree_hal_buffer_t* acc_buffer = acc ? iree_hal_buffer_view_buffer(acc) : NULL; - iree_hal_buffer_t* result_buffer = iree_hal_buffer_view_buffer(result); + iree_hal_buffer_t *input_buffer = iree_hal_buffer_view_buffer(input); + iree_hal_buffer_t *kernel_buffer = iree_hal_buffer_view_buffer(kernel); + iree_hal_buffer_t *acc_buffer = acc ? iree_hal_buffer_view_buffer(acc) : NULL; + iree_hal_buffer_t *result_buffer = iree_hal_buffer_view_buffer(result); iree_status_t status = iree_ok_status(); @@ -455,7 +456,7 @@ static iree_status_t conv2d_results_initialize( iree_hal_buffer_byte_length(input_buffer); status = iree_allocator_malloc(host_allocator, out_results->input_contents.data_length, - (void**)&out_results->input_contents.data); + (void **)&out_results->input_contents.data); } if (iree_status_is_ok(status)) { status = iree_hal_device_transfer_d2h( @@ -469,7 +470,7 @@ static iree_status_t conv2d_results_initialize( iree_hal_buffer_byte_length(kernel_buffer); status = iree_allocator_malloc(host_allocator, out_results->kernel_contents.data_length, - (void**)&out_results->kernel_contents.data); + (void **)&out_results->kernel_contents.data); } if (iree_status_is_ok(status)) { status = iree_hal_device_transfer_d2h( @@ -484,7 +485,7 @@ static iree_status_t conv2d_results_initialize( iree_hal_buffer_byte_length(acc_buffer); status = iree_allocator_malloc(host_allocator, out_results->acc_contents.data_length, - (void**)&out_results->acc_contents.data); + (void **)&out_results->acc_contents.data); } if (iree_status_is_ok(status)) { status = iree_hal_device_transfer_d2h( @@ -499,7 +500,7 @@ static iree_status_t conv2d_results_initialize( iree_hal_buffer_byte_length(result_buffer); status = iree_allocator_malloc(host_allocator, out_results->actual_contents.data_length, - (void**)&out_results->actual_contents.data); + (void **)&out_results->actual_contents.data); } if (iree_status_is_ok(status)) { status = iree_hal_device_transfer_d2h( @@ -513,7 +514,7 @@ static iree_status_t conv2d_results_initialize( iree_hal_buffer_byte_length(result_buffer); status = iree_allocator_malloc( host_allocator, out_results->expected_contents.data_length, - (void**)&out_results->expected_contents.data); + (void **)&out_results->expected_contents.data); } if (!iree_status_is_ok(status)) { @@ -523,7 +524,7 @@ static iree_status_t conv2d_results_initialize( return status; } -static void conv2d_results_deinitialize(conv2d_results_t* results) { +static void conv2d_results_deinitialize(conv2d_results_t *results) { IREE_TRACE_ZONE_BEGIN(z0); iree_allocator_free(results->host_allocator, results->input_contents.data); @@ -541,8 +542,8 @@ static void conv2d_results_deinitialize(conv2d_results_t* results) { // obtained and validated the {n, f, oh, ow}_size values. On error, the first // index is returned where the actual and expected value doesn't match. TODO: // Add detailed logging to |file|. -static iree_status_t check_conv2d_results_impl(FILE* file, - const conv2d_results_t* results, +static iree_status_t check_conv2d_results_impl(FILE *file, + const conv2d_results_t *results, int check_every) { IREE_TRACE_ZONE_BEGIN(z0); @@ -566,7 +567,8 @@ static iree_status_t check_conv2d_results_impl(FILE* file, for (iree_hal_dim_t oc = 0; oc < results->f; ++oc) { for (iree_hal_dim_t oh = 0; oh < oh_size; ++oh) { for (iree_hal_dim_t ow = 0; ow < ow_size; ++ow) { - if (++count < check_every) continue; + if (++count < check_every) + continue; count = 0; iree_hal_dim_t idx = convert_to_1d_index(results->f, oh_size, ow_size, n, oc, oh, ow); @@ -578,7 +580,8 @@ static iree_status_t check_conv2d_results_impl(FILE* file, idx, results->result_type, results->expected_contents.data); if (!iree_test_utils_result_elements_agree(actual_value, expected_value)) { - printf("HERE: actual_value: %f, actual_value: %f\n", actual_value.f32, expected_value.f32); + printf("HERE: actual_value: %f, actual_value: %f\n", + actual_value.f32, expected_value.f32); fprintf( file, "\n\nerror: the actual and expected result tensors disagree " @@ -601,8 +604,8 @@ static iree_status_t check_conv2d_results_impl(FILE* file, // reference conv2d implementation on the same inputs to check if the output // is correct. On error, the first index is returned where the actual and // expected value doesn't match. TODO: Add detailed logging to |file|. -static iree_status_t check_conv2d_results(FILE* file, - const conv2d_results_t* results) { +static iree_status_t check_conv2d_results(FILE *file, + const conv2d_results_t *results) { IREE_TRACE_ZONE_BEGIN(z0); // TODO: Increase the check every param to reduce the number of comparisons. int check_every = 1; @@ -630,7 +633,7 @@ static iree_status_t check_conv2d_results(FILE* file, namespace iree { class Conv2dTestModuleState final { - public: +public: explicit Conv2dTestModuleState(iree_allocator_t host_allocator) : host_allocator_(host_allocator) {} ~Conv2dTestModuleState() = default; @@ -639,10 +642,10 @@ class Conv2dTestModuleState final { // |element_type|. The given |seed| is passed to the pseudorandom generator. // The pseudorandom values are reproducible both across runs and across // machines. - StatusOr> GenerateRandom4dTensor( - const vm::ref device, int64_t dim0, int64_t dim1, - int64_t dim2, int64_t dim3, iree_hal_element_type_t element_type, - int32_t seed) { + StatusOr> + GenerateRandom4dTensor(const vm::ref device, int64_t dim0, + int64_t dim1, int64_t dim2, int64_t dim3, + iree_hal_element_type_t element_type, int32_t seed) { iree_hal_dim_t dims[4] = { (iree_hal_dim_t)dim0, (iree_hal_dim_t)dim1, @@ -665,8 +668,8 @@ class Conv2dTestModuleState final { device.get(), iree_hal_device_allocator(device.get()), IREE_ARRAYSIZE(dims), dims, element_type, IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR, buffer_params, - +[](iree_hal_buffer_mapping_t* mapping, void* user_data) { - callback_state_t callback_state = *(callback_state_t*)user_data; + +[](iree_hal_buffer_mapping_t *mapping, void *user_data) { + callback_state_t callback_state = *(callback_state_t *)user_data; iree_byte_span_t span = mapping->contents; // Generate "uniform" integer-valued numbers in the range [min, // max]. @@ -678,9 +681,9 @@ class Conv2dTestModuleState final { uint32_t range = (max - min + 1) / 4; iree_host_size_t element_byte_count = iree_hal_element_dense_byte_count(callback_state.element_type); - uint8_t* data_end = span.data + span.data_length; + uint8_t *data_end = span.data + span.data_length; uint32_t state = callback_state.seed; - for (uint8_t* data = span.data; data < data_end; + for (uint8_t *data = span.data; data < data_end; data += element_byte_count) { int32_t value = (int32_t)iree_test_utils_pseudorandom_range(&state, range) + @@ -694,14 +697,15 @@ class Conv2dTestModuleState final { return std::move(result_view); } - Status CheckConv2dResults( - const vm::ref device, int64_t n, int64_t c, int64_t h, - int64_t w, int64_t f, int64_t kh, int64_t kw, int64_t layout, int64_t sh, - int64_t sw, int64_t dh, int64_t dw, - const vm::ref input, - const vm::ref kernel, - const vm::ref acc, - const vm::ref actual_result) { + Status + CheckConv2dResults(const vm::ref device, int64_t n, + int64_t c, int64_t h, int64_t w, int64_t f, int64_t kh, + int64_t kw, int64_t layout, int64_t sh, int64_t sw, + int64_t dh, int64_t dw, + const vm::ref input, + const vm::ref kernel, + const vm::ref acc, + const vm::ref actual_result) { conv2d_results_t results = {}; IREE_RETURN_IF_ERROR(conv2d_results_initialize( device.get(), (iree_hal_dim_t)n, (iree_hal_dim_t)c, (iree_hal_dim_t)h, @@ -715,7 +719,7 @@ class Conv2dTestModuleState final { return status; } - private: +private: iree_allocator_t host_allocator_; }; @@ -729,17 +733,17 @@ static const vm::NativeFunction struct Conv2dTestModule final : public vm::NativeModule { using vm::NativeModule::NativeModule; - StatusOr> CreateState( - iree_allocator_t host_allocator) override { + StatusOr> + CreateState(iree_allocator_t host_allocator) override { return std::make_unique(host_allocator); } }; -} // namespace iree +} // namespace iree -static iree_status_t conv2d_test_module_create(iree_vm_instance_t* instance, +static iree_status_t conv2d_test_module_create(iree_vm_instance_t *instance, iree_allocator_t host_allocator, - iree_vm_module_t** out_module) { + iree_vm_module_t **out_module) { IREE_ASSERT_ARGUMENT(out_module); *out_module = NULL; auto module = std::make_unique( @@ -750,7 +754,7 @@ static iree_status_t conv2d_test_module_create(iree_vm_instance_t* instance, return iree_ok_status(); } -int main(int argc, char** argv) { +int main(int argc, char **argv) { IREE_TRACE_APP_ENTER(); iree_flags_parse_checked(IREE_FLAGS_PARSE_MODE_DEFAULT, &argc, &argv); From 5d518b3e2591434451adf2c84ec7a6fb66f89c9b Mon Sep 17 00:00:00 2001 From: erman-gurses Date: Wed, 23 Oct 2024 17:20:39 -0500 Subject: [PATCH 08/16] Data type increasing order Signed-off-by: erman-gurses --- linalg_ops/convolution/generate_e2e_conv2d_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linalg_ops/convolution/generate_e2e_conv2d_tests.py b/linalg_ops/convolution/generate_e2e_conv2d_tests.py index 491b48b..92e3c1a 100644 --- a/linalg_ops/convolution/generate_e2e_conv2d_tests.py +++ b/linalg_ops/convolution/generate_e2e_conv2d_tests.py @@ -39,9 +39,9 @@ class InputElemTypeId(enum.Enum): @enum.unique class AccElemTypeId(enum.Enum): NONE = "" + F16 = "f16" I32 = "i32" F32 = "f32" - F16 = "f16" # Enumerates of the collections of shapes that we can generate tests for. From 46a462bbb209c5f73cb510d8721214e556d251fd Mon Sep 17 00:00:00 2001 From: erman-gurses Date: Wed, 23 Oct 2024 20:28:23 -0500 Subject: [PATCH 09/16] Remove to requiring exact results Signed-off-by: erman-gurses --- linalg_ops/iree-e2e-conv2d-test.cc | 2 -- linalg_ops/test_utils.c | 8 +++++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/linalg_ops/iree-e2e-conv2d-test.cc b/linalg_ops/iree-e2e-conv2d-test.cc index 2d1e986..a8d2391 100644 --- a/linalg_ops/iree-e2e-conv2d-test.cc +++ b/linalg_ops/iree-e2e-conv2d-test.cc @@ -580,8 +580,6 @@ static iree_status_t check_conv2d_results_impl(FILE *file, idx, results->result_type, results->expected_contents.data); if (!iree_test_utils_result_elements_agree(actual_value, expected_value)) { - printf("HERE: actual_value: %f, actual_value: %f\n", - actual_value.f32, expected_value.f32); fprintf( file, "\n\nerror: the actual and expected result tensors disagree " diff --git a/linalg_ops/test_utils.c b/linalg_ops/test_utils.c index 659ea2c..5f5ce16 100644 --- a/linalg_ops/test_utils.c +++ b/linalg_ops/test_utils.c @@ -22,7 +22,7 @@ #include "iree/tooling/device_util.h" #include "iree/vm/api.h" -IREE_FLAG(bool, require_exact_results, true, +IREE_FLAG(bool, require_exact_results, false, "Requires floating point result elements to match exactly."); bool iree_test_utils_require_exact_results(void) { @@ -194,8 +194,10 @@ bool iree_test_utils_result_elements_agree(iree_test_utils_e2e_value_t expected, // `require_exact_results` flag is set to `false`. case IREE_TEST_UTILS_VALUE_TYPE_F16: if (actual.f16 == expected.f16) return true; - return fabsf((actual.f16) - (expected.f16)) < - acceptable_fp_delta; + if (iree_test_utils_require_exact_results()) return false; + return fabsf(iree_math_f16_to_f32(actual.f16) - + iree_math_f16_to_f32(expected.f16)) < + acceptable_fp_delta; case IREE_TEST_UTILS_VALUE_TYPE_BF16: if (actual.bf16_u16 == expected.bf16_u16) return true; if (iree_test_utils_require_exact_results()) return false; From cd86b13146a94d2f3db5c2f2561c656e81a5b516 Mon Sep 17 00:00:00 2001 From: erman-gurses Date: Wed, 23 Oct 2024 20:36:46 -0500 Subject: [PATCH 10/16] Add test test generation for GPU Signed-off-by: erman-gurses --- linalg_ops/convolution/CMakeLists.txt | 74 ++++++++++++ .../convolution/generate_test_mlir_files.sh | 35 ++++++ .../conv2d_f16_nchw_f16_fchw_f32_large.mlir | 8 ++ ...v2d_f16_nchw_f16_fchw_f32_large_calls.mlir | 108 ++++++++++++++++++ .../conv2d_f16_nhwc_f16_hwcf_f32_large.mlir | 8 ++ ...v2d_f16_nhwc_f16_hwcf_f32_large_calls.mlir | 108 ++++++++++++++++++ .../conv2d_i8_nhwc_i8_hwcf_i32_large.mlir | 8 ++ ...onv2d_i8_nhwc_i8_hwcf_i32_large_calls.mlir | 108 ++++++++++++++++++ 8 files changed, 457 insertions(+) create mode 100644 linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_large.mlir create mode 100644 linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_large_calls.mlir create mode 100644 linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_large.mlir create mode 100644 linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_large_calls.mlir create mode 100644 linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_large.mlir create mode 100644 linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_large_calls.mlir diff --git a/linalg_ops/convolution/CMakeLists.txt b/linalg_ops/convolution/CMakeLists.txt index 5b5f32e..cdcf6bc 100644 --- a/linalg_ops/convolution/CMakeLists.txt +++ b/linalg_ops/convolution/CMakeLists.txt @@ -73,3 +73,77 @@ foreach(_DTYPE IN LISTS _DTYPES) ) endforeach() endforeach() + +############################################################################### +# +# GPU - ROCm/HIP, default flags. +# +############################################################################### + +# To distinguish between CDNA(gfx9) and RDNA3(gfx11) +if(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx9") + +set(_SIZES) +list(APPEND _SIZES "large") + +set(_DTYPES_AND_LAYOUTS) +list(APPEND _DTYPES_AND_LAYOUTS "f16_nhwc_f16_hwcf_f32") +list(APPEND _DTYPES_AND_LAYOUTS "f16_nchw_f16_fchw_f32") +list(APPEND _DTYPES_AND_LAYOUTS "i8_nhwc_i8_hwcf_i32") + + +foreach(_DTYPE_AND_LAYOUT IN LISTS _DTYPES_AND_LAYOUTS) + foreach(_SIZE IN LISTS _SIZES) + iree_test_suites_runner_test( + NAME + conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE} + TESTS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}.mlir" + CALLS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}_calls.mlir" + TEST_RUNNER + iree-test-suites_iree-e2e-conv2d-test + TARGET_BACKEND + "rocm" + DRIVER + "hip" + COMPILER_FLAGS + "--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}" + RUNNER_FLAGS + LABELS + ) + endforeach() +endforeach() + +elseif(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx11") + +set(_SIZES) +list(APPEND _SIZES "large") + +set(_DTYPES_AND_LAYOUTS) +list(APPEND _DTYPES_AND_LAYOUTS "f16_nhwc_f16_hwcf_f32") + +foreach(_DTYPE_AND_LAYOUT IN LISTS _DTYPES_AND_LAYOUTS) + foreach(_SIZE IN LISTS _SIZES) + iree_test_suites_runner_test( + NAME + conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE} + TESTS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}.mlir" + CALLS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}_calls.mlir" + TEST_RUNNER + iree-test-suites_iree-e2e-conv2d-test + TARGET_BACKEND + "rocm" + DRIVER + "hip" + COMPILER_FLAGS + "--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}" + RUNNER_FLAGS + LABELS + ) + endforeach() +endforeach() + +endif() diff --git a/linalg_ops/convolution/generate_test_mlir_files.sh b/linalg_ops/convolution/generate_test_mlir_files.sh index 9137999..f195ec4 100755 --- a/linalg_ops/convolution/generate_test_mlir_files.sh +++ b/linalg_ops/convolution/generate_test_mlir_files.sh @@ -76,3 +76,38 @@ for type_combination in ${type_combinations[@]}; do --shapes=${shape} done done + +shapes=( + "large" +) +# input_type;input_layout;kernel_type;kernel_layout;acc_type +type_and_layout_combinations=( + "f16;nhwc;f16;hwcf;f32" + "f16;nchw;f16;fchw;f32" + "i8;nhwc;i8;hwcf;i32" +) +for type_and_layout_combination in ${type_and_layout_combinations[@]}; do + IFS=";" read -r -a combination <<< "${type_and_layout_combination}" + input_type="${combination[0]}" + input_layout="${combination[1]}" + kernel_type="${combination[2]}" + kernel_layout="${combination[3]}" + acc_type="${combination[4]}" + type_layout_name="${input_type}_${input_layout}_${kernel_type}_${kernel_layout}_${acc_type}" + #layout_name="${input_layout}_${kernel_layout}" + type_combination_dir="${generated_dir_root}/${type_layout_name}" + mkdir -p ${type_combination_dir} + for shape in ${shapes[@]}; do + echo "Generating conv2d test files for ${type_layout_name}_${shape}" + name="conv2d_${type_layout_name}_${shape}" + python ${this_dir}/generate_e2e_conv2d_tests.py \ + --output_conv2d_mlir=${type_combination_dir}/${name}.mlir \ + --output_calls_mlir=${type_combination_dir}/${name}_calls.mlir \ + --input_type=${input_type} \ + --input_layout=${input_layout} \ + --kernel_type=${kernel_type} \ + --kernel_layout=${kernel_layout} \ + --acc_type=${acc_type} \ + --shapes=${shape} + done +done diff --git a/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_large.mlir b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_large.mlir new file mode 100644 index 0000000..21afe9d --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_large.mlir @@ -0,0 +1,8 @@ +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f32(%lhs: tensor<2x4x128x128xf16>, %rhs: tensor<8x4x3x3xf16>, %acc: tensor<2x8x126x126xf32>) -> tensor<2x8x126x126xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x4x128x128xf16>, tensor<8x4x3x3xf16>) outs(%acc: tensor<2x8x126x126xf32>) -> tensor<2x8x126x126xf32> + return %result: tensor<2x8x126x126xf32> +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f32(%lhs: tensor<2x3x128x128xf16>, %rhs: tensor<12x3x3x3xf16>, %acc: tensor<2x12x126x126xf32>) -> tensor<2x12x126x126xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x3x128x128xf16>, tensor<12x3x3x3xf16>) outs(%acc: tensor<2x12x126x126xf32>) -> tensor<2x12x126x126xf32> + return %result: tensor<2x12x126x126xf32> +} diff --git a/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_large_calls.mlir b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_large_calls.mlir new file mode 100644 index 0000000..34fdff2 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_large_calls.mlir @@ -0,0 +1,108 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f32_2_4_128_128_8_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x4x128x128x8x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 4 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 128 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 8 : i64 + %kernel_dim1 = arith.constant 4 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 126 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 126 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 4 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 8 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f32_2_3_128_128_12_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x3x128x128x12x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 3 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 128 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 12 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 12 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 126 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 12 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 126 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 3 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 12 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_large.mlir b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_large.mlir new file mode 100644 index 0000000..2a7b2f2 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_large.mlir @@ -0,0 +1,8 @@ +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f32(%lhs: tensor<2x128x128x4xf16>, %rhs: tensor<3x3x4x8xf16>, %acc: tensor<2x126x126x8xf32>) -> tensor<2x126x126x8xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x128x128x4xf16>, tensor<3x3x4x8xf16>) outs(%acc: tensor<2x126x126x8xf32>) -> tensor<2x126x126x8xf32> + return %result: tensor<2x126x126x8xf32> +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f32(%lhs: tensor<2x128x128x3xf16>, %rhs: tensor<3x3x3x12xf16>, %acc: tensor<2x126x126x12xf32>) -> tensor<2x126x126x12xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x128x128x3xf16>, tensor<3x3x3x12xf16>) outs(%acc: tensor<2x126x126x12xf32>) -> tensor<2x126x126x12xf32> + return %result: tensor<2x126x126x12xf32> +} diff --git a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_large_calls.mlir b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_large_calls.mlir new file mode 100644 index 0000000..cd7d928 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_large_calls.mlir @@ -0,0 +1,108 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f32_2_4_128_128_8_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x4x128x128x8x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 128 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 4 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 4 : i64 + %kernel_dim3 = arith.constant 8 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 126 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 126 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 4 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 8 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f32_2_3_128_128_12_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x3x128x128x12x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 128 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 3 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 12 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 126 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 12 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 126 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 12 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 3 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 12 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_large.mlir b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_large.mlir new file mode 100644 index 0000000..99911df --- /dev/null +++ b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_large.mlir @@ -0,0 +1,8 @@ +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_i8_i8_i32(%lhs: tensor<2x128x128x4xi8>, %rhs: tensor<3x3x4x8xi8>, %acc: tensor<2x126x126x8xi32>) -> tensor<2x126x126x8xi32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x128x128x4xi8>, tensor<3x3x4x8xi8>) outs(%acc: tensor<2x126x126x8xi32>) -> tensor<2x126x126x8xi32> + return %result: tensor<2x126x126x8xi32> +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_i8_i8_i32(%lhs: tensor<2x128x128x3xi8>, %rhs: tensor<3x3x3x12xi8>, %acc: tensor<2x126x126x12xi32>) -> tensor<2x126x126x12xi32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x128x128x3xi8>, tensor<3x3x3x12xi8>) outs(%acc: tensor<2x126x126x12xi32>) -> tensor<2x126x126x12xi32> + return %result: tensor<2x126x126x12xi32> +} diff --git a/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_large_calls.mlir b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_large_calls.mlir new file mode 100644 index 0000000..a863eca --- /dev/null +++ b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_large_calls.mlir @@ -0,0 +1,108 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_i8_i8_i32_2_4_128_128_8_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x4x128x128x8x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 128 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 4 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 4 : i64 + %kernel_dim3 = arith.constant 8 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 126 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 126 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 4 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 8 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_i8_i8_i32_2_3_128_128_12_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x3x128x128x12x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 128 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 3 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 12 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 126 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 12 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 126 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 12 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 3 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 12 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} From 858881346e4852ebd55b666a2b6d1e6dae7316a7 Mon Sep 17 00:00:00 2001 From: erman-gurses Date: Wed, 23 Oct 2024 20:49:33 -0500 Subject: [PATCH 11/16] Formatting Signed-off-by: erman-gurses --- linalg_ops/test_utils.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/linalg_ops/test_utils.c b/linalg_ops/test_utils.c index 5f5ce16..05065b9 100644 --- a/linalg_ops/test_utils.c +++ b/linalg_ops/test_utils.c @@ -195,8 +195,8 @@ bool iree_test_utils_result_elements_agree(iree_test_utils_e2e_value_t expected, case IREE_TEST_UTILS_VALUE_TYPE_F16: if (actual.f16 == expected.f16) return true; if (iree_test_utils_require_exact_results()) return false; - return fabsf(iree_math_f16_to_f32(actual.f16) - - iree_math_f16_to_f32(expected.f16)) < + return fabsf(iree_math_f16_to_f32(actual.f16) - + iree_math_f16_to_f32(expected.f16)) < acceptable_fp_delta; case IREE_TEST_UTILS_VALUE_TYPE_BF16: if (actual.bf16_u16 == expected.bf16_u16) return true; From 427f4064ccd31fc764cd1d80fde72bc55696fccc Mon Sep 17 00:00:00 2001 From: erman-gurses Date: Thu, 24 Oct 2024 00:35:09 -0500 Subject: [PATCH 12/16] Remove old code Signed-off-by: erman-gurses --- linalg_ops/convolution/generate_test_mlir_files.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/linalg_ops/convolution/generate_test_mlir_files.sh b/linalg_ops/convolution/generate_test_mlir_files.sh index f195ec4..292ddf5 100755 --- a/linalg_ops/convolution/generate_test_mlir_files.sh +++ b/linalg_ops/convolution/generate_test_mlir_files.sh @@ -94,7 +94,6 @@ for type_and_layout_combination in ${type_and_layout_combinations[@]}; do kernel_layout="${combination[3]}" acc_type="${combination[4]}" type_layout_name="${input_type}_${input_layout}_${kernel_type}_${kernel_layout}_${acc_type}" - #layout_name="${input_layout}_${kernel_layout}" type_combination_dir="${generated_dir_root}/${type_layout_name}" mkdir -p ${type_combination_dir} for shape in ${shapes[@]}; do From 5295314e5db69ec26f623b0da592a758636ef289 Mon Sep 17 00:00:00 2001 From: erman-gurses Date: Thu, 24 Oct 2024 16:43:32 -0500 Subject: [PATCH 13/16] Add medium and small problem sizes for GPU targets Signed-off-by: erman-gurses --- linalg_ops/convolution/CMakeLists.txt | 6 - .../convolution/generate_test_mlir_files.sh | 3 - .../conv2d_f16_nchw_f16_fchw_f32_medium.mlir | 12 ++ ...2d_f16_nchw_f16_fchw_f32_medium_calls.mlir | 158 ++++++++++++++++++ .../conv2d_f16_nchw_f16_fchw_f32_small.mlir | 12 ++ ...v2d_f16_nchw_f16_fchw_f32_small_calls.mlir | 158 ++++++++++++++++++ .../conv2d_f16_nhwc_f16_hwcf_f32_medium.mlir | 12 ++ ...2d_f16_nhwc_f16_hwcf_f32_medium_calls.mlir | 158 ++++++++++++++++++ .../conv2d_f16_nhwc_f16_hwcf_f32_small.mlir | 12 ++ ...v2d_f16_nhwc_f16_hwcf_f32_small_calls.mlir | 158 ++++++++++++++++++ .../conv2d_i8_nhwc_i8_hwcf_i32_medium.mlir | 12 ++ ...nv2d_i8_nhwc_i8_hwcf_i32_medium_calls.mlir | 158 ++++++++++++++++++ .../conv2d_i8_nhwc_i8_hwcf_i32_small.mlir | 12 ++ ...onv2d_i8_nhwc_i8_hwcf_i32_small_calls.mlir | 158 ++++++++++++++++++ 14 files changed, 1020 insertions(+), 9 deletions(-) create mode 100644 linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_medium.mlir create mode 100644 linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_medium_calls.mlir create mode 100644 linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_small.mlir create mode 100644 linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_small_calls.mlir create mode 100644 linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_medium.mlir create mode 100644 linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_medium_calls.mlir create mode 100644 linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_small.mlir create mode 100644 linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_small_calls.mlir create mode 100644 linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_medium.mlir create mode 100644 linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_medium_calls.mlir create mode 100644 linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_small.mlir create mode 100644 linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_small_calls.mlir diff --git a/linalg_ops/convolution/CMakeLists.txt b/linalg_ops/convolution/CMakeLists.txt index cdcf6bc..ed8c033 100644 --- a/linalg_ops/convolution/CMakeLists.txt +++ b/linalg_ops/convolution/CMakeLists.txt @@ -83,9 +83,6 @@ endforeach() # To distinguish between CDNA(gfx9) and RDNA3(gfx11) if(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx9") -set(_SIZES) -list(APPEND _SIZES "large") - set(_DTYPES_AND_LAYOUTS) list(APPEND _DTYPES_AND_LAYOUTS "f16_nhwc_f16_hwcf_f32") list(APPEND _DTYPES_AND_LAYOUTS "f16_nchw_f16_fchw_f32") @@ -117,9 +114,6 @@ endforeach() elseif(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx11") -set(_SIZES) -list(APPEND _SIZES "large") - set(_DTYPES_AND_LAYOUTS) list(APPEND _DTYPES_AND_LAYOUTS "f16_nhwc_f16_hwcf_f32") diff --git a/linalg_ops/convolution/generate_test_mlir_files.sh b/linalg_ops/convolution/generate_test_mlir_files.sh index 292ddf5..1fabd77 100755 --- a/linalg_ops/convolution/generate_test_mlir_files.sh +++ b/linalg_ops/convolution/generate_test_mlir_files.sh @@ -77,9 +77,6 @@ for type_combination in ${type_combinations[@]}; do done done -shapes=( - "large" -) # input_type;input_layout;kernel_type;kernel_layout;acc_type type_and_layout_combinations=( "f16;nhwc;f16;hwcf;f32" diff --git a/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_medium.mlir b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_medium.mlir new file mode 100644 index 0000000..cd7d6a4 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_medium.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%lhs: tensor<2x2x32x32xf16>, %rhs: tensor<2x2x3x3xf16>, %acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<2x2x3x3xf16>) outs(%acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> + return %result: tensor<2x2x30x30xf32> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32(%lhs: tensor<2x2x32x32xf16>, %rhs: tensor<64x2x3x3xf16>, %acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<64x2x3x3xf16>) outs(%acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> + return %result: tensor<2x64x30x30xf32> +} +func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f32(%lhs: tensor<2x32x32x32xf16>, %rhs: tensor<64x32x3x3xf16>, %acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x32xf16>, tensor<64x32x3x3xf16>) outs(%acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> + return %result: tensor<2x64x30x30xf32> +} diff --git a/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_medium_calls.mlir b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_medium_calls.mlir new file mode 100644 index 0000000..451175c --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_medium_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32_2_2_32_32_2_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32_2_2_32_32_64_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 64 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 64 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 64 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f32_2_32_32_32_64_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x32x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 64 : i64 + %kernel_dim1 = arith.constant 32 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 64 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 64 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 32 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_small.mlir b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_small.mlir new file mode 100644 index 0000000..f2d0ea0 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_small.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f32(%lhs: tensor<1x1x1x1xf16>, %rhs: tensor<1x1x1x1xf16>, %acc: tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x1x1xf16>, tensor<1x1x1x1xf16>) outs(%acc: tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32> + return %result: tensor<1x1x1x1xf32> +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f32(%lhs: tensor<1x1x16x16xf16>, %rhs: tensor<1x1x2x2xf16>, %acc: tensor<1x1x15x15xf32>) -> tensor<1x1x15x15xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x16x16xf16>, tensor<1x1x2x2xf16>) outs(%acc: tensor<1x1x15x15xf32>) -> tensor<1x1x15x15xf32> + return %result: tensor<1x1x15x15xf32> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%lhs: tensor<2x2x32x32xf16>, %rhs: tensor<2x2x3x3xf16>, %acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<2x2x3x3xf16>) outs(%acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> + return %result: tensor<2x2x30x30xf32> +} diff --git a/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_small_calls.mlir b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_small_calls.mlir new file mode 100644 index 0000000..5c160c6 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_small_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f32_1_1_1_1_1_1_1_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x1x1x1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 1 : i64 + %input_dim3 = arith.constant 1 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 1 : i64 + %kernel_dim3 = arith.constant 1 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 1 : i64 + %acc_dim3 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 1 : i64 + %acc_copy_dim3 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 1 : i64 + %w = arith.constant 1 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 1 : i64 + %kw = arith.constant 1 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f32_1_1_16_16_1_2_2_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x16x16x1x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 16 : i64 + %input_dim3 = arith.constant 16 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 2 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 15 : i64 + %acc_dim3 = arith.constant 15 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 15 : i64 + %acc_copy_dim3 = arith.constant 15 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 16 : i64 + %w = arith.constant 16 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 2 : i64 + %kw = arith.constant 2 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32_2_2_32_32_2_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_medium.mlir b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_medium.mlir new file mode 100644 index 0000000..c77e99c --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_medium.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%lhs: tensor<2x32x32x2xf16>, %rhs: tensor<3x3x2x2xf16>, %acc: tensor<2x30x30x2xf32>) -> tensor<2x30x30x2xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x2xf16>, tensor<3x3x2x2xf16>) outs(%acc: tensor<2x30x30x2xf32>) -> tensor<2x30x30x2xf32> + return %result: tensor<2x30x30x2xf32> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32(%lhs: tensor<2x32x32x2xf16>, %rhs: tensor<3x3x2x64xf16>, %acc: tensor<2x30x30x64xf32>) -> tensor<2x30x30x64xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x2xf16>, tensor<3x3x2x64xf16>) outs(%acc: tensor<2x30x30x64xf32>) -> tensor<2x30x30x64xf32> + return %result: tensor<2x30x30x64xf32> +} +func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f32(%lhs: tensor<2x32x32x32xf16>, %rhs: tensor<3x3x32x64xf16>, %acc: tensor<2x30x30x64xf32>) -> tensor<2x30x30x64xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x32xf16>, tensor<3x3x32x64xf16>) outs(%acc: tensor<2x30x30x64xf32>) -> tensor<2x30x30x64xf32> + return %result: tensor<2x30x30x64xf32> +} diff --git a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_medium_calls.mlir b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_medium_calls.mlir new file mode 100644 index 0000000..0c7afed --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_medium_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32_2_2_32_32_2_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 2 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 2 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32_2_2_32_32_64_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 2 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 64 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 64 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 64 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f32_2_32_32_32_64_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x32x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 32 : i64 + %kernel_dim3 = arith.constant 64 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 64 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 64 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 32 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_small.mlir b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_small.mlir new file mode 100644 index 0000000..59e9504 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_small.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f32(%lhs: tensor<1x1x1x1xf16>, %rhs: tensor<1x1x1x1xf16>, %acc: tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x1x1xf16>, tensor<1x1x1x1xf16>) outs(%acc: tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32> + return %result: tensor<1x1x1x1xf32> +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f32(%lhs: tensor<1x16x16x1xf16>, %rhs: tensor<2x2x1x1xf16>, %acc: tensor<1x15x15x1xf32>) -> tensor<1x15x15x1xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x16x16x1xf16>, tensor<2x2x1x1xf16>) outs(%acc: tensor<1x15x15x1xf32>) -> tensor<1x15x15x1xf32> + return %result: tensor<1x15x15x1xf32> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%lhs: tensor<2x32x32x2xf16>, %rhs: tensor<3x3x2x2xf16>, %acc: tensor<2x30x30x2xf32>) -> tensor<2x30x30x2xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x2xf16>, tensor<3x3x2x2xf16>) outs(%acc: tensor<2x30x30x2xf32>) -> tensor<2x30x30x2xf32> + return %result: tensor<2x30x30x2xf32> +} diff --git a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_small_calls.mlir b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_small_calls.mlir new file mode 100644 index 0000000..6a9ab15 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_small_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f32_1_1_1_1_1_1_1_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x1x1x1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 1 : i64 + %input_dim3 = arith.constant 1 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 1 : i64 + %kernel_dim3 = arith.constant 1 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 1 : i64 + %acc_dim3 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 1 : i64 + %acc_copy_dim3 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 1 : i64 + %w = arith.constant 1 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 1 : i64 + %kw = arith.constant 1 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f32_1_1_16_16_1_2_2_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x16x16x1x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 16 : i64 + %input_dim2 = arith.constant 16 : i64 + %input_dim3 = arith.constant 1 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 1 : i64 + %kernel_dim3 = arith.constant 1 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 15 : i64 + %acc_dim2 = arith.constant 15 : i64 + %acc_dim3 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 15 : i64 + %acc_copy_dim2 = arith.constant 15 : i64 + %acc_copy_dim3 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 16 : i64 + %w = arith.constant 16 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 2 : i64 + %kw = arith.constant 2 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32_2_2_32_32_2_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 2 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 2 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_medium.mlir b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_medium.mlir new file mode 100644 index 0000000..5b8985b --- /dev/null +++ b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_medium.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32(%lhs: tensor<2x32x32x2xi8>, %rhs: tensor<3x3x2x2xi8>, %acc: tensor<2x30x30x2xi32>) -> tensor<2x30x30x2xi32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x2xi8>, tensor<3x3x2x2xi8>) outs(%acc: tensor<2x30x30x2xi32>) -> tensor<2x30x30x2xi32> + return %result: tensor<2x30x30x2xi32> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_i8_i8_i32(%lhs: tensor<2x32x32x2xi8>, %rhs: tensor<3x3x2x64xi8>, %acc: tensor<2x30x30x64xi32>) -> tensor<2x30x30x64xi32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x2xi8>, tensor<3x3x2x64xi8>) outs(%acc: tensor<2x30x30x64xi32>) -> tensor<2x30x30x64xi32> + return %result: tensor<2x30x30x64xi32> +} +func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_i8_i8_i32(%lhs: tensor<2x32x32x32xi8>, %rhs: tensor<3x3x32x64xi8>, %acc: tensor<2x30x30x64xi32>) -> tensor<2x30x30x64xi32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x32xi8>, tensor<3x3x32x64xi8>) outs(%acc: tensor<2x30x30x64xi32>) -> tensor<2x30x30x64xi32> + return %result: tensor<2x30x30x64xi32> +} diff --git a/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_medium_calls.mlir b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_medium_calls.mlir new file mode 100644 index 0000000..e4c2495 --- /dev/null +++ b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_medium_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32_2_2_32_32_2_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 2 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 2 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_i8_i8_i32_2_2_32_32_64_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 2 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 64 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 64 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 64 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_i8_i8_i32_2_32_32_32_64_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x32x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 32 : i64 + %kernel_dim3 = arith.constant 64 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 64 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 64 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 32 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_small.mlir b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_small.mlir new file mode 100644 index 0000000..5d52f93 --- /dev/null +++ b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_small.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_i8_i8_i32(%lhs: tensor<1x1x1x1xi8>, %rhs: tensor<1x1x1x1xi8>, %acc: tensor<1x1x1x1xi32>) -> tensor<1x1x1x1xi32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x1x1xi8>, tensor<1x1x1x1xi8>) outs(%acc: tensor<1x1x1x1xi32>) -> tensor<1x1x1x1xi32> + return %result: tensor<1x1x1x1xi32> +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_i8_i8_i32(%lhs: tensor<1x16x16x1xi8>, %rhs: tensor<2x2x1x1xi8>, %acc: tensor<1x15x15x1xi32>) -> tensor<1x15x15x1xi32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x16x16x1xi8>, tensor<2x2x1x1xi8>) outs(%acc: tensor<1x15x15x1xi32>) -> tensor<1x15x15x1xi32> + return %result: tensor<1x15x15x1xi32> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32(%lhs: tensor<2x32x32x2xi8>, %rhs: tensor<3x3x2x2xi8>, %acc: tensor<2x30x30x2xi32>) -> tensor<2x30x30x2xi32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x2xi8>, tensor<3x3x2x2xi8>) outs(%acc: tensor<2x30x30x2xi32>) -> tensor<2x30x30x2xi32> + return %result: tensor<2x30x30x2xi32> +} diff --git a/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_small_calls.mlir b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_small_calls.mlir new file mode 100644 index 0000000..da9b803 --- /dev/null +++ b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_small_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_i8_i8_i32_1_1_1_1_1_1_1_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x1x1x1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 1 : i64 + %input_dim3 = arith.constant 1 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 1 : i64 + %kernel_dim3 = arith.constant 1 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 1 : i64 + %acc_dim3 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 1 : i64 + %acc_copy_dim3 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 1 : i64 + %w = arith.constant 1 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 1 : i64 + %kw = arith.constant 1 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_i8_i8_i32_1_1_16_16_1_2_2_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x16x16x1x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 16 : i64 + %input_dim2 = arith.constant 16 : i64 + %input_dim3 = arith.constant 1 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 1 : i64 + %kernel_dim3 = arith.constant 1 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 15 : i64 + %acc_dim2 = arith.constant 15 : i64 + %acc_dim3 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 15 : i64 + %acc_copy_dim2 = arith.constant 15 : i64 + %acc_copy_dim3 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 16 : i64 + %w = arith.constant 16 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 2 : i64 + %kw = arith.constant 2 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32_2_2_32_32_2_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 2 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 2 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} From bb4257b34e4943f0d7bc1798b8fce2136d2c86f9 Mon Sep 17 00:00:00 2001 From: erman-gurses Date: Tue, 29 Oct 2024 17:07:22 -0500 Subject: [PATCH 14/16] Address multiple comments Signed-off-by: erman-gurses --- linalg_ops/convolution/CMakeLists.txt | 208 ++++++++++++++++-- .../convolution/generate_e2e_conv2d_tests.py | 2 +- .../convolution/generate_test_mlir_files.sh | 66 ++---- .../conv2d_f16_nchw_f16_fchw_f16_large.mlir} | 0 ...2d_f16_nchw_f16_fchw_f16_large_calls.mlir} | 0 .../conv2d_f16_nchw_f16_fchw_f16_medium.mlir} | 4 +- ...d_f16_nchw_f16_fchw_f16_medium_calls.mlir} | 14 +- .../conv2d_f16_nchw_f16_fchw_f16_small.mlir} | 0 ...2d_f16_nchw_f16_fchw_f16_small_calls.mlir} | 0 .../conv2d_f16_nchw_f16_fchw_f32_medium.mlir | 4 +- ...2d_f16_nchw_f16_fchw_f32_medium_calls.mlir | 14 +- .../conv2d_f16_nhwc_f16_hwcf_f16_large.mlir | 8 + ...v2d_f16_nhwc_f16_hwcf_f16_large_calls.mlir | 108 +++++++++ .../conv2d_f16_nhwc_f16_hwcf_f16_medium.mlir | 12 + ...2d_f16_nhwc_f16_hwcf_f16_medium_calls.mlir | 158 +++++++++++++ .../conv2d_f16_nhwc_f16_hwcf_f16_small.mlir | 12 + ...v2d_f16_nhwc_f16_hwcf_f16_small_calls.mlir | 158 +++++++++++++ .../conv2d_f16_nhwc_f16_hwcf_f32_medium.mlir | 4 +- ...2d_f16_nhwc_f16_hwcf_f32_medium_calls.mlir | 14 +- .../conv2d_f32_nchw_f32_fchw_f32_large.mlir} | 0 ...2d_f32_nchw_f32_fchw_f32_large_calls.mlir} | 0 .../conv2d_f32_nchw_f32_fchw_f32_medium.mlir} | 4 +- ...d_f32_nchw_f32_fchw_f32_medium_calls.mlir} | 14 +- .../conv2d_f32_nchw_f32_fchw_f32_small.mlir} | 0 ...2d_f32_nchw_f32_fchw_f32_small_calls.mlir} | 0 .../conv2d_f32_nhwc_f32_hwcf_f32_large.mlir | 8 + ...v2d_f32_nhwc_f32_hwcf_f32_large_calls.mlir | 108 +++++++++ .../conv2d_f32_nhwc_f32_hwcf_f32_medium.mlir | 12 + ...2d_f32_nhwc_f32_hwcf_f32_medium_calls.mlir | 158 +++++++++++++ .../conv2d_f32_nhwc_f32_hwcf_f32_small.mlir | 12 + ...v2d_f32_nhwc_f32_hwcf_f32_small_calls.mlir | 158 +++++++++++++ .../conv2d_i8_nchw_i8_fchw_i32_large.mlir | 8 + ...onv2d_i8_nchw_i8_fchw_i32_large_calls.mlir | 108 +++++++++ .../conv2d_i8_nchw_i8_fchw_i32_medium.mlir | 12 + ...nv2d_i8_nchw_i8_fchw_i32_medium_calls.mlir | 158 +++++++++++++ .../conv2d_i8_nchw_i8_fchw_i32_small.mlir | 12 + ...onv2d_i8_nchw_i8_fchw_i32_small_calls.mlir | 158 +++++++++++++ .../conv2d_i8_nhwc_i8_hwcf_i32_medium.mlir | 4 +- ...nv2d_i8_nhwc_i8_hwcf_i32_medium_calls.mlir | 14 +- linalg_ops/test_utils.c | 8 +- linalg_ops/test_utils.h | 1 - 41 files changed, 1627 insertions(+), 116 deletions(-) rename linalg_ops/convolution/generated/{f16_f16_f16/conv2d_f16_f16_f16_large.mlir => f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_large.mlir} (100%) rename linalg_ops/convolution/generated/{f16_f16_f16/conv2d_f16_f16_f16_large_calls.mlir => f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_large_calls.mlir} (100%) rename linalg_ops/convolution/generated/{f16_f16_f16/conv2d_f16_f16_f16_medium.mlir => f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_medium.mlir} (86%) rename linalg_ops/convolution/generated/{f16_f16_f16/conv2d_f16_f16_f16_medium_calls.mlir => f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_medium_calls.mlir} (96%) rename linalg_ops/convolution/generated/{f16_f16_f16/conv2d_f16_f16_f16_small.mlir => f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_small.mlir} (100%) rename linalg_ops/convolution/generated/{f16_f16_f16/conv2d_f16_f16_f16_small_calls.mlir => f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_small_calls.mlir} (100%) create mode 100644 linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_large.mlir create mode 100644 linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_large_calls.mlir create mode 100644 linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_medium.mlir create mode 100644 linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_medium_calls.mlir create mode 100644 linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_small.mlir create mode 100644 linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_small_calls.mlir rename linalg_ops/convolution/generated/{f32_f32_f32/conv2d_f32_f32_f32_large.mlir => f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_large.mlir} (100%) rename linalg_ops/convolution/generated/{f32_f32_f32/conv2d_f32_f32_f32_large_calls.mlir => f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_large_calls.mlir} (100%) rename linalg_ops/convolution/generated/{f32_f32_f32/conv2d_f32_f32_f32_medium.mlir => f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_medium.mlir} (86%) rename linalg_ops/convolution/generated/{f32_f32_f32/conv2d_f32_f32_f32_medium_calls.mlir => f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_medium_calls.mlir} (96%) rename linalg_ops/convolution/generated/{f32_f32_f32/conv2d_f32_f32_f32_small.mlir => f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_small.mlir} (100%) rename linalg_ops/convolution/generated/{f32_f32_f32/conv2d_f32_f32_f32_small_calls.mlir => f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_small_calls.mlir} (100%) create mode 100644 linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_large.mlir create mode 100644 linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_large_calls.mlir create mode 100644 linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_medium.mlir create mode 100644 linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_medium_calls.mlir create mode 100644 linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_small.mlir create mode 100644 linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_small_calls.mlir create mode 100644 linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_large.mlir create mode 100644 linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_large_calls.mlir create mode 100644 linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_medium.mlir create mode 100644 linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_medium_calls.mlir create mode 100644 linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_small.mlir create mode 100644 linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_small_calls.mlir diff --git a/linalg_ops/convolution/CMakeLists.txt b/linalg_ops/convolution/CMakeLists.txt index ed8c033..73bf3cf 100644 --- a/linalg_ops/convolution/CMakeLists.txt +++ b/linalg_ops/convolution/CMakeLists.txt @@ -12,26 +12,30 @@ list(APPEND _SIZES "large") list(APPEND _SIZES "medium") list(APPEND _SIZES "small") + +set(_DTYPES_AND_LAYOUTS) +list(APPEND _DTYPES_AND_LAYOUTS "f16_nhwc_f16_hwcf_f16") +list(APPEND _DTYPES_AND_LAYOUTS "f16_nchw_f16_fchw_f16") +list(APPEND _DTYPES_AND_LAYOUTS "f16_nhwc_f16_hwcf_f32") +list(APPEND _DTYPES_AND_LAYOUTS "f16_nchw_f16_fchw_f32") +list(APPEND _DTYPES_AND_LAYOUTS "f32_nhwc_f32_hwcf_f32") +list(APPEND _DTYPES_AND_LAYOUTS "f32_nchw_f32_fchw_f32") + ############################################################################### # # CPU - llvm-cpu on local-task, default flags. # ############################################################################### - -set(_DTYPES) -list(APPEND _DTYPES "f16_f16_f16") -list(APPEND _DTYPES "f32_f32_f32") - -foreach(_DTYPE IN LISTS _DTYPES) +foreach(_DTYPE_AND_LAYOUT IN LISTS _DTYPES_AND_LAYOUTS) foreach(_SIZE IN LISTS _SIZES) iree_test_suites_runner_test( NAME - conv2d_${_DTYPE}_${_SIZE} + conv2d_llvm-cpu_local-task_${_DTYPE_AND_LAYOUT}_${_SIZE} TESTS_SRC - "generated/${_DTYPE}/conv2d_${_DTYPE}_${_SIZE}.mlir" + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}.mlir" CALLS_SRC - "generated/${_DTYPE}/conv2d_${_DTYPE}_${_SIZE}_calls.mlir" + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}_calls.mlir" TEST_RUNNER iree-test-suites_iree-e2e-conv2d-test TARGET_BACKEND @@ -47,15 +51,21 @@ foreach(_DTYPE IN LISTS _DTYPES) endforeach() endforeach() -foreach(_DTYPE IN LISTS _DTYPES) +############################################################################### +# +# CPU - Winograd llvm-cpu on local-task, default flags. +# +############################################################################### + +foreach(_DTYPE_AND_LAYOUT IN LISTS _DTYPES_AND_LAYOUTS) foreach(_SIZE IN LISTS _SIZES) iree_test_suites_runner_test( NAME - conv2d_winograd_${_DTYPE}_${_SIZE} + conv2d_winograd_llvm-cpu_local-task_${_DTYPE_AND_LAYOUT}_${_SIZE} TESTS_SRC - "generated/${_DTYPE}/conv2d_${_DTYPE}_${_SIZE}.mlir" + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}.mlir" CALLS_SRC - "generated/${_DTYPE}/conv2d_${_DTYPE}_${_SIZE}_calls.mlir" + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}_calls.mlir" TEST_RUNNER iree-test-suites_iree-e2e-conv2d-test TARGET_BACKEND @@ -76,24 +86,47 @@ endforeach() ############################################################################### # -# GPU - ROCm/HIP, default flags. +# GPU - ROCm/HIP, CDNA(gfx9). # ############################################################################### # To distinguish between CDNA(gfx9) and RDNA3(gfx11) if(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx9") -set(_DTYPES_AND_LAYOUTS) -list(APPEND _DTYPES_AND_LAYOUTS "f16_nhwc_f16_hwcf_f32") -list(APPEND _DTYPES_AND_LAYOUTS "f16_nchw_f16_fchw_f32") -list(APPEND _DTYPES_AND_LAYOUTS "i8_nhwc_i8_hwcf_i32") +foreach(_DTYPE_AND_LAYOUT IN LISTS _DTYPES_AND_LAYOUTS) + foreach(_SIZE IN LISTS _SIZES) + iree_test_suites_runner_test( + NAME + conv2d_rocm_hip_${_DTYPE_AND_LAYOUT}_${_SIZE} + TESTS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}.mlir" + CALLS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}_calls.mlir" + TEST_RUNNER + iree-test-suites_iree-e2e-conv2d-test + TARGET_BACKEND + "rocm" + DRIVER + "hip" + COMPILER_FLAGS + "--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}" + RUNNER_FLAGS + LABELS + ) + endforeach() +endforeach() +############################################################################### +# +# Winograd GPU - ROCm/HIP, CDNA(gfx9). +# +############################################################################### foreach(_DTYPE_AND_LAYOUT IN LISTS _DTYPES_AND_LAYOUTS) foreach(_SIZE IN LISTS _SIZES) iree_test_suites_runner_test( NAME - conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE} + conv2d_winograd_rocm_hip_${_DTYPE_AND_LAYOUT}_${_SIZE} TESTS_SRC "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}.mlir" CALLS_SRC @@ -105,6 +138,7 @@ foreach(_DTYPE_AND_LAYOUT IN LISTS _DTYPES_AND_LAYOUTS) DRIVER "hip" COMPILER_FLAGS + "--iree-preprocessing-pass-pipeline=builtin.module\(func.func\(iree-linalg-ext-convert-conv2d-to-winograd{replace-all-convs=true}\)\)" "--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}" RUNNER_FLAGS LABELS @@ -112,16 +146,148 @@ foreach(_DTYPE_AND_LAYOUT IN LISTS _DTYPES_AND_LAYOUTS) endforeach() endforeach() +############################################################################### +# +# GPU - ROCm/HIP, CDNA(gfx11) +# +############################################################################### + elseif(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx11") +foreach(_DTYPE_AND_LAYOUT IN LISTS _DTYPES_AND_LAYOUTS) + foreach(_SIZE IN LISTS _SIZES) + iree_test_suites_runner_test( + NAME + conv2d_rocm_hip_${_DTYPE_AND_LAYOUT}_${_SIZE} + TESTS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}.mlir" + CALLS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}_calls.mlir" + TEST_RUNNER + iree-test-suites_iree-e2e-conv2d-test + TARGET_BACKEND + "rocm" + DRIVER + "hip" + COMPILER_FLAGS + "--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}" + RUNNER_FLAGS + LABELS + ) + endforeach() +endforeach() + +############################################################################### +# +# Winograd GPU - ROCm/HIP, CDNA(gfx11). +# +############################################################################### + +foreach(_DTYPE_AND_LAYOUT IN LISTS _DTYPES_AND_LAYOUTS) + foreach(_SIZE IN LISTS _SIZES) + iree_test_suites_runner_test( + NAME + conv2d_winograd_rocm_hip_${_DTYPE_AND_LAYOUT}_${_SIZE} + TESTS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}.mlir" + CALLS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}_calls.mlir" + TEST_RUNNER + iree-test-suites_iree-e2e-conv2d-test + TARGET_BACKEND + "rocm" + DRIVER + "hip" + COMPILER_FLAGS + "--iree-preprocessing-pass-pipeline=builtin.module\(func.func\(iree-linalg-ext-convert-conv2d-to-winograd{replace-all-convs=true}\)\)" + "--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}" + RUNNER_FLAGS + LABELS + ) + endforeach() +endforeach() + +endif() + +# CPU and GPU tests for "i8_nhwc_i8_hwcf_i32" combination without Winograd set(_DTYPES_AND_LAYOUTS) -list(APPEND _DTYPES_AND_LAYOUTS "f16_nhwc_f16_hwcf_f32") +list(APPEND _DTYPES_AND_LAYOUTS "i8_nhwc_i8_hwcf_i32") +list(APPEND _DTYPES_AND_LAYOUTS "i8_nchw_i8_fchw_i32") + +############################################################################### +# +# CPU - llvm-cpu on local-task, default flags. +# +############################################################################### + +foreach(_DTYPE_AND_LAYOUT IN LISTS _DTYPES_AND_LAYOUTS) + foreach(_SIZE IN LISTS _SIZES) + iree_test_suites_runner_test( + NAME + conv2d_llvm-cpu_local-task_${_DTYPE_AND_LAYOUT}_${_SIZE} + TESTS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}.mlir" + CALLS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}_calls.mlir" + TEST_RUNNER + iree-test-suites_iree-e2e-conv2d-test + TARGET_BACKEND + "llvm-cpu" + DRIVER + "local-task" + COMPILER_FLAGS + RUNNER_FLAGS + LABELS + "hostonly" + "local" + ) + endforeach() +endforeach() + +############################################################################### +# +# GPU - ROCm/HIP, CDNA(gfx9). +# +############################################################################### + +# To distinguish between CDNA(gfx9) and RDNA3(gfx11) +if(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx9") + +foreach(_DTYPE_AND_LAYOUT IN LISTS _DTYPES_AND_LAYOUTS) + foreach(_SIZE IN LISTS _SIZES) + iree_test_suites_runner_test( + NAME + conv2d_rocm_hip_${_DTYPE_AND_LAYOUT}_${_SIZE} + TESTS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}.mlir" + CALLS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}_calls.mlir" + TEST_RUNNER + iree-test-suites_iree-e2e-conv2d-test + TARGET_BACKEND + "rocm" + DRIVER + "hip" + COMPILER_FLAGS + "--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}" + RUNNER_FLAGS + LABELS + ) + endforeach() +endforeach() + +############################################################################### +# +# GPU - ROCm/HIP, CDNA(gfx11) +# +############################################################################### +elseif(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx11") foreach(_DTYPE_AND_LAYOUT IN LISTS _DTYPES_AND_LAYOUTS) foreach(_SIZE IN LISTS _SIZES) iree_test_suites_runner_test( NAME - conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE} + conv2d_rocm_hip_${_DTYPE_AND_LAYOUT}_${_SIZE} TESTS_SRC "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}.mlir" CALLS_SRC diff --git a/linalg_ops/convolution/generate_e2e_conv2d_tests.py b/linalg_ops/convolution/generate_e2e_conv2d_tests.py index 92e3c1a..1a1d984 100644 --- a/linalg_ops/convolution/generate_e2e_conv2d_tests.py +++ b/linalg_ops/convolution/generate_e2e_conv2d_tests.py @@ -120,7 +120,7 @@ def get_test_shapes(shapes_id: ShapesId): return [ TestShape(n=2, c=2, h=32, w=32, kh=3, kw=3, f=2, accumulate=True), TestShape(n=2, c=2, h=32, w=32, kh=3, kw=3, f=64, accumulate=True), - TestShape(n=2, c=32, h=32, w=32, kh=3, kw=3, f=64, accumulate=True), + TestShape(n=2, c=16, h=32, w=32, kh=3, kw=3, f=64, accumulate=True), ] if shapes_id == ShapesId.LARGE: return [ diff --git a/linalg_ops/convolution/generate_test_mlir_files.sh b/linalg_ops/convolution/generate_test_mlir_files.sh index 1fabd77..69aea7d 100755 --- a/linalg_ops/convolution/generate_test_mlir_files.sh +++ b/linalg_ops/convolution/generate_test_mlir_files.sh @@ -13,20 +13,20 @@ # linalg_ops/ # convolution/ # generated/ -# f16_f16_f16/ -# conv2d_f16_f16_f16_large_calls.mlir -# conv2d_f16_f16_f16_large.mlir -# conv2d_f16_f16_f16_medium_calls.mlir -# conv2d_f16_f16_f16_medium.mlir -# conv2d_f16_f16_f16_small_calls.mlir -# conv2d_f16_f16_f16_small.mlir -# f32_f32_f32/ -# conv2d_f32_f32_f32_large_calls.mlir -# conv2d_f32_f32_f32_large.mlir -# conv2d_f32_f32_f32_medium_calls.mlir -# conv2d_f32_f32_f32_medium.mlir -# conv2d_f32_f32_f32_small_calls.mlir -# conv2d_f32_f32_f32_small.mlir +# f16_nchw_f16_fchw_f16/ +# conv2d_f16_nchw_f16_fchw_f16_large_calls.mlir +# conv2d_f16_nchw_f16_fchw_f16_large.mlir +# conv2d_f16_nchw_f16_fchw_f16_medium_calls.mlir +# conv2d_f16_nchw_f16_fchw_f16_medium.mlir +# conv2d_f16_nchw_f16_fchw_f16_small_calls.mlir +# conv2d_f16_nchw_f16_fchw_f16_small.mlir +# f16_nchw_f16_fchw_f32/ +# conv2d_f16_nchw_f16_fchw_f32_large_calls.mlir +# conv2d_f16_nchw_f16_fchw_f32_large.mlir +# conv2d_f16_nchw_f16_fchw_f32_medium_calls.mlir +# conv2d_f16_nchw_f16_fchw_f32_medium.mlir +# conv2d_f16_nchw_f16_fchw_f32_small_calls.mlir +# conv2d_f16_nchw_f16_fchw_f32_small.mlir # ... # ... # Usage: @@ -47,42 +47,18 @@ shapes=( "large" ) -# input_type;kernel_type;acc_type -type_combinations=( - "f16;f16;f16" - "f32;f32;f32" -) - -for type_combination in ${type_combinations[@]}; do - IFS=";" read -r -a types <<< "${type_combination}" - input_type="${types[0]}" - kernel_type="${types[1]}" - acc_type="${types[2]}" - - type_name="${input_type}_${kernel_type}_${acc_type}" - type_combination_dir="${generated_dir_root}/${type_name}" - mkdir -p ${type_combination_dir} - - for shape in ${shapes[@]}; do - echo "Generating conv2d test files for ${type_name}_${shape}" - - name="conv2d_${type_name}_${shape}" - python ${this_dir}/generate_e2e_conv2d_tests.py \ - --output_conv2d_mlir=${type_combination_dir}/${name}.mlir \ - --output_calls_mlir=${type_combination_dir}/${name}_calls.mlir \ - --input_type=${input_type} \ - --kernel_type=${kernel_type} \ - --acc_type=${acc_type} \ - --shapes=${shape} - done -done - # input_type;input_layout;kernel_type;kernel_layout;acc_type type_and_layout_combinations=( + "f16;nhwc;f16;hwcf;f16" + "f16;nchw;f16;fchw;f16" "f16;nhwc;f16;hwcf;f32" "f16;nchw;f16;fchw;f32" + "f32;nhwc;f32;hwcf;f32" + "f32;nchw;f32;fchw;f32" "i8;nhwc;i8;hwcf;i32" + "i8;nchw;i8;fchw;i32" ) + for type_and_layout_combination in ${type_and_layout_combinations[@]}; do IFS=";" read -r -a combination <<< "${type_and_layout_combination}" input_type="${combination[0]}" @@ -90,9 +66,11 @@ for type_and_layout_combination in ${type_and_layout_combinations[@]}; do kernel_type="${combination[2]}" kernel_layout="${combination[3]}" acc_type="${combination[4]}" + type_layout_name="${input_type}_${input_layout}_${kernel_type}_${kernel_layout}_${acc_type}" type_combination_dir="${generated_dir_root}/${type_layout_name}" mkdir -p ${type_combination_dir} + for shape in ${shapes[@]}; do echo "Generating conv2d test files for ${type_layout_name}_${shape}" name="conv2d_${type_layout_name}_${shape}" diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_large.mlir b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_large.mlir similarity index 100% rename from linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_large.mlir rename to linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_large.mlir diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_large_calls.mlir b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_large_calls.mlir similarity index 100% rename from linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_large_calls.mlir rename to linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_large_calls.mlir diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_medium.mlir b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_medium.mlir similarity index 86% rename from linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_medium.mlir rename to linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_medium.mlir index caba912..b630d29 100644 --- a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_medium.mlir +++ b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_medium.mlir @@ -6,7 +6,7 @@ func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16(%lhs: tens %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<64x2x3x3xf16>) outs(%acc: tensor<2x64x30x30xf16>) -> tensor<2x64x30x30xf16> return %result: tensor<2x64x30x30xf16> } -func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f16(%lhs: tensor<2x32x32x32xf16>, %rhs: tensor<64x32x3x3xf16>, %acc: tensor<2x64x30x30xf16>) -> tensor<2x64x30x30xf16> { - %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x32xf16>, tensor<64x32x3x3xf16>) outs(%acc: tensor<2x64x30x30xf16>) -> tensor<2x64x30x30xf16> +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f16(%lhs: tensor<2x16x32x32xf16>, %rhs: tensor<64x16x3x3xf16>, %acc: tensor<2x64x30x30xf16>) -> tensor<2x64x30x30xf16> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x16x32x32xf16>, tensor<64x16x3x3xf16>) outs(%acc: tensor<2x64x30x30xf16>) -> tensor<2x64x30x30xf16> return %result: tensor<2x64x30x30xf16> } diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_medium_calls.mlir b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_medium_calls.mlir similarity index 96% rename from linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_medium_calls.mlir rename to linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_medium_calls.mlir index 60860a5..1298b5b 100644 --- a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_medium_calls.mlir +++ b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_medium_calls.mlir @@ -6,7 +6,7 @@ func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view -func.func private @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16_2_2_32_32_2_3_3_acc_0() attributes { iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} @@ -106,20 +106,20 @@ func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16_2_2_32_32_ call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () return } -func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f16_2_32_32_32_64_3_3_acc_2() attributes { - iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x32x32x32x64x3x3"} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f16_2_16_32_32_64_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x16x32x32x64x3x3"} } { %device_index = arith.constant 0 : index %device = hal.devices.get %device_index : !hal.device %input_dim0 = arith.constant 2 : i64 - %input_dim1 = arith.constant 32 : i64 + %input_dim1 = arith.constant 16 : i64 %input_dim2 = arith.constant 32 : i64 %input_dim3 = arith.constant 32 : i64 %input_element_type = hal.element_type : i32 %input_seed = arith.constant 8 : i32 %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view %kernel_dim0 = arith.constant 64 : i64 - %kernel_dim1 = arith.constant 32 : i64 + %kernel_dim1 = arith.constant 16 : i64 %kernel_dim2 = arith.constant 3 : i64 %kernel_dim3 = arith.constant 3 : i64 %kernel_element_type = hal.element_type : i32 @@ -139,9 +139,9 @@ func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f16_2_32_32_3 %acc_copy_element_type = hal.element_type : i32 %acc_copy_seed = arith.constant 10 : i32 %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %result = call @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view %n = arith.constant 2 : i64 - %c = arith.constant 32 : i64 + %c = arith.constant 16 : i64 %h = arith.constant 32 : i64 %w = arith.constant 32 : i64 %f = arith.constant 64 : i64 diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_small.mlir b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_small.mlir similarity index 100% rename from linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_small.mlir rename to linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_small.mlir diff --git a/linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_small_calls.mlir b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_small_calls.mlir similarity index 100% rename from linalg_ops/convolution/generated/f16_f16_f16/conv2d_f16_f16_f16_small_calls.mlir rename to linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_small_calls.mlir diff --git a/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_medium.mlir b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_medium.mlir index cd7d6a4..0f9d9df 100644 --- a/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_medium.mlir +++ b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_medium.mlir @@ -6,7 +6,7 @@ func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32(%lhs: tens %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<64x2x3x3xf16>) outs(%acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> return %result: tensor<2x64x30x30xf32> } -func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f32(%lhs: tensor<2x32x32x32xf16>, %rhs: tensor<64x32x3x3xf16>, %acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> { - %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x32xf16>, tensor<64x32x3x3xf16>) outs(%acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f32(%lhs: tensor<2x16x32x32xf16>, %rhs: tensor<64x16x3x3xf16>, %acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x16x32x32xf16>, tensor<64x16x3x3xf16>) outs(%acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> return %result: tensor<2x64x30x30xf32> } diff --git a/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_medium_calls.mlir b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_medium_calls.mlir index 451175c..f8798e9 100644 --- a/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_medium_calls.mlir +++ b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_medium_calls.mlir @@ -6,7 +6,7 @@ func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view -func.func private @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32_2_2_32_32_2_3_3_acc_0() attributes { iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} @@ -106,20 +106,20 @@ func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32_2_2_32_32_ call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () return } -func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f32_2_32_32_32_64_3_3_acc_2() attributes { - iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x32x32x32x64x3x3"} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f32_2_16_32_32_64_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x16x32x32x64x3x3"} } { %device_index = arith.constant 0 : index %device = hal.devices.get %device_index : !hal.device %input_dim0 = arith.constant 2 : i64 - %input_dim1 = arith.constant 32 : i64 + %input_dim1 = arith.constant 16 : i64 %input_dim2 = arith.constant 32 : i64 %input_dim3 = arith.constant 32 : i64 %input_element_type = hal.element_type : i32 %input_seed = arith.constant 8 : i32 %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view %kernel_dim0 = arith.constant 64 : i64 - %kernel_dim1 = arith.constant 32 : i64 + %kernel_dim1 = arith.constant 16 : i64 %kernel_dim2 = arith.constant 3 : i64 %kernel_dim3 = arith.constant 3 : i64 %kernel_element_type = hal.element_type : i32 @@ -139,9 +139,9 @@ func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f32_2_32_32_3 %acc_copy_element_type = hal.element_type : i32 %acc_copy_seed = arith.constant 10 : i32 %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %result = call @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view %n = arith.constant 2 : i64 - %c = arith.constant 32 : i64 + %c = arith.constant 16 : i64 %h = arith.constant 32 : i64 %w = arith.constant 32 : i64 %f = arith.constant 64 : i64 diff --git a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_large.mlir b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_large.mlir new file mode 100644 index 0000000..17eb9e8 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_large.mlir @@ -0,0 +1,8 @@ +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16(%lhs: tensor<2x128x128x4xf16>, %rhs: tensor<3x3x4x8xf16>, %acc: tensor<2x126x126x8xf16>) -> tensor<2x126x126x8xf16> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x128x128x4xf16>, tensor<3x3x4x8xf16>) outs(%acc: tensor<2x126x126x8xf16>) -> tensor<2x126x126x8xf16> + return %result: tensor<2x126x126x8xf16> +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16(%lhs: tensor<2x128x128x3xf16>, %rhs: tensor<3x3x3x12xf16>, %acc: tensor<2x126x126x12xf16>) -> tensor<2x126x126x12xf16> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x128x128x3xf16>, tensor<3x3x3x12xf16>) outs(%acc: tensor<2x126x126x12xf16>) -> tensor<2x126x126x12xf16> + return %result: tensor<2x126x126x12xf16> +} diff --git a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_large_calls.mlir b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_large_calls.mlir new file mode 100644 index 0000000..b07a2e5 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_large_calls.mlir @@ -0,0 +1,108 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16_2_4_128_128_8_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x4x128x128x8x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 128 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 4 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 4 : i64 + %kernel_dim3 = arith.constant 8 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 126 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 126 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 4 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 8 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16_2_3_128_128_12_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x3x128x128x12x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 128 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 3 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 12 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 126 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 12 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 126 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 12 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 3 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 12 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_medium.mlir b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_medium.mlir new file mode 100644 index 0000000..addb8a2 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_medium.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%lhs: tensor<2x32x32x2xf16>, %rhs: tensor<3x3x2x2xf16>, %acc: tensor<2x30x30x2xf16>) -> tensor<2x30x30x2xf16> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x2xf16>, tensor<3x3x2x2xf16>) outs(%acc: tensor<2x30x30x2xf16>) -> tensor<2x30x30x2xf16> + return %result: tensor<2x30x30x2xf16> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16(%lhs: tensor<2x32x32x2xf16>, %rhs: tensor<3x3x2x64xf16>, %acc: tensor<2x30x30x64xf16>) -> tensor<2x30x30x64xf16> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x2xf16>, tensor<3x3x2x64xf16>) outs(%acc: tensor<2x30x30x64xf16>) -> tensor<2x30x30x64xf16> + return %result: tensor<2x30x30x64xf16> +} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f16(%lhs: tensor<2x32x32x16xf16>, %rhs: tensor<3x3x16x64xf16>, %acc: tensor<2x30x30x64xf16>) -> tensor<2x30x30x64xf16> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x16xf16>, tensor<3x3x16x64xf16>) outs(%acc: tensor<2x30x30x64xf16>) -> tensor<2x30x30x64xf16> + return %result: tensor<2x30x30x64xf16> +} diff --git a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_medium_calls.mlir b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_medium_calls.mlir new file mode 100644 index 0000000..17ee9c1 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_medium_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16_2_2_32_32_2_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 2 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 2 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16_2_2_32_32_64_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 2 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 64 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 64 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 64 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f16_2_16_32_32_64_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x16x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 16 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 16 : i64 + %kernel_dim3 = arith.constant 64 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 64 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 64 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 16 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_small.mlir b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_small.mlir new file mode 100644 index 0000000..b3bf59e --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_small.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16(%lhs: tensor<1x1x1x1xf16>, %rhs: tensor<1x1x1x1xf16>, %acc: tensor<1x1x1x1xf16>) -> tensor<1x1x1x1xf16> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x1x1xf16>, tensor<1x1x1x1xf16>) outs(%acc: tensor<1x1x1x1xf16>) -> tensor<1x1x1x1xf16> + return %result: tensor<1x1x1x1xf16> +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16(%lhs: tensor<1x16x16x1xf16>, %rhs: tensor<2x2x1x1xf16>, %acc: tensor<1x15x15x1xf16>) -> tensor<1x15x15x1xf16> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x16x16x1xf16>, tensor<2x2x1x1xf16>) outs(%acc: tensor<1x15x15x1xf16>) -> tensor<1x15x15x1xf16> + return %result: tensor<1x15x15x1xf16> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%lhs: tensor<2x32x32x2xf16>, %rhs: tensor<3x3x2x2xf16>, %acc: tensor<2x30x30x2xf16>) -> tensor<2x30x30x2xf16> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x2xf16>, tensor<3x3x2x2xf16>) outs(%acc: tensor<2x30x30x2xf16>) -> tensor<2x30x30x2xf16> + return %result: tensor<2x30x30x2xf16> +} diff --git a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_small_calls.mlir b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_small_calls.mlir new file mode 100644 index 0000000..f50584a --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_small_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16_1_1_1_1_1_1_1_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x1x1x1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 1 : i64 + %input_dim3 = arith.constant 1 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 1 : i64 + %kernel_dim3 = arith.constant 1 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 1 : i64 + %acc_dim3 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 1 : i64 + %acc_copy_dim3 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 1 : i64 + %w = arith.constant 1 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 1 : i64 + %kw = arith.constant 1 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16_1_1_16_16_1_2_2_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x16x16x1x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 16 : i64 + %input_dim2 = arith.constant 16 : i64 + %input_dim3 = arith.constant 1 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 1 : i64 + %kernel_dim3 = arith.constant 1 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 15 : i64 + %acc_dim2 = arith.constant 15 : i64 + %acc_dim3 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 15 : i64 + %acc_copy_dim2 = arith.constant 15 : i64 + %acc_copy_dim3 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 16 : i64 + %w = arith.constant 16 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 2 : i64 + %kw = arith.constant 2 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16_2_2_32_32_2_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 2 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 2 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_medium.mlir b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_medium.mlir index c77e99c..d140187 100644 --- a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_medium.mlir +++ b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_medium.mlir @@ -6,7 +6,7 @@ func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32(%lhs: tens %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x2xf16>, tensor<3x3x2x64xf16>) outs(%acc: tensor<2x30x30x64xf32>) -> tensor<2x30x30x64xf32> return %result: tensor<2x30x30x64xf32> } -func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f32(%lhs: tensor<2x32x32x32xf16>, %rhs: tensor<3x3x32x64xf16>, %acc: tensor<2x30x30x64xf32>) -> tensor<2x30x30x64xf32> { - %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x32xf16>, tensor<3x3x32x64xf16>) outs(%acc: tensor<2x30x30x64xf32>) -> tensor<2x30x30x64xf32> +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f32(%lhs: tensor<2x32x32x16xf16>, %rhs: tensor<3x3x16x64xf16>, %acc: tensor<2x30x30x64xf32>) -> tensor<2x30x30x64xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x16xf16>, tensor<3x3x16x64xf16>) outs(%acc: tensor<2x30x30x64xf32>) -> tensor<2x30x30x64xf32> return %result: tensor<2x30x30x64xf32> } diff --git a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_medium_calls.mlir b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_medium_calls.mlir index 0c7afed..548e7ad 100644 --- a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_medium_calls.mlir +++ b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_medium_calls.mlir @@ -6,7 +6,7 @@ func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view -func.func private @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32_2_2_32_32_2_3_3_acc_0() attributes { iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} @@ -106,21 +106,21 @@ func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32_2_2_32_32_ call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () return } -func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f32_2_32_32_32_64_3_3_acc_2() attributes { - iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x32x32x32x64x3x3"} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f32_2_16_32_32_64_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x16x32x32x64x3x3"} } { %device_index = arith.constant 0 : index %device = hal.devices.get %device_index : !hal.device %input_dim0 = arith.constant 2 : i64 %input_dim1 = arith.constant 32 : i64 %input_dim2 = arith.constant 32 : i64 - %input_dim3 = arith.constant 32 : i64 + %input_dim3 = arith.constant 16 : i64 %input_element_type = hal.element_type : i32 %input_seed = arith.constant 8 : i32 %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view %kernel_dim0 = arith.constant 3 : i64 %kernel_dim1 = arith.constant 3 : i64 - %kernel_dim2 = arith.constant 32 : i64 + %kernel_dim2 = arith.constant 16 : i64 %kernel_dim3 = arith.constant 64 : i64 %kernel_element_type = hal.element_type : i32 %kernel_seed = arith.constant 9 : i32 @@ -139,9 +139,9 @@ func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f32_2_32_32_3 %acc_copy_element_type = hal.element_type : i32 %acc_copy_seed = arith.constant 10 : i32 %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %result = call @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view %n = arith.constant 2 : i64 - %c = arith.constant 32 : i64 + %c = arith.constant 16 : i64 %h = arith.constant 32 : i64 %w = arith.constant 32 : i64 %f = arith.constant 64 : i64 diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_large.mlir b/linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_large.mlir similarity index 100% rename from linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_large.mlir rename to linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_large.mlir diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_large_calls.mlir b/linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_large_calls.mlir similarity index 100% rename from linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_large_calls.mlir rename to linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_large_calls.mlir diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_medium.mlir b/linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_medium.mlir similarity index 86% rename from linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_medium.mlir rename to linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_medium.mlir index 97ff810..d074f1f 100644 --- a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_medium.mlir +++ b/linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_medium.mlir @@ -6,7 +6,7 @@ func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32(%lhs: tens %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf32>, tensor<64x2x3x3xf32>) outs(%acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> return %result: tensor<2x64x30x30xf32> } -func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f32_f32_f32(%lhs: tensor<2x32x32x32xf32>, %rhs: tensor<64x32x3x3xf32>, %acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> { - %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x32xf32>, tensor<64x32x3x3xf32>) outs(%acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f32_f32_f32(%lhs: tensor<2x16x32x32xf32>, %rhs: tensor<64x16x3x3xf32>, %acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x16x32x32xf32>, tensor<64x16x3x3xf32>) outs(%acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> return %result: tensor<2x64x30x30xf32> } diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_medium_calls.mlir b/linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_medium_calls.mlir similarity index 96% rename from linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_medium_calls.mlir rename to linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_medium_calls.mlir index 3a2f05c..092a825 100644 --- a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_medium_calls.mlir +++ b/linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_medium_calls.mlir @@ -6,7 +6,7 @@ func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view -func.func private @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32_2_2_32_32_2_3_3_acc_0() attributes { iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} @@ -106,20 +106,20 @@ func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32_2_2_32_32_ call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () return } -func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f32_f32_f32_2_32_32_32_64_3_3_acc_2() attributes { - iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x32x32x32x64x3x3"} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f32_f32_f32_2_16_32_32_64_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x16x32x32x64x3x3"} } { %device_index = arith.constant 0 : index %device = hal.devices.get %device_index : !hal.device %input_dim0 = arith.constant 2 : i64 - %input_dim1 = arith.constant 32 : i64 + %input_dim1 = arith.constant 16 : i64 %input_dim2 = arith.constant 32 : i64 %input_dim3 = arith.constant 32 : i64 %input_element_type = hal.element_type : i32 %input_seed = arith.constant 8 : i32 %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view %kernel_dim0 = arith.constant 64 : i64 - %kernel_dim1 = arith.constant 32 : i64 + %kernel_dim1 = arith.constant 16 : i64 %kernel_dim2 = arith.constant 3 : i64 %kernel_dim3 = arith.constant 3 : i64 %kernel_element_type = hal.element_type : i32 @@ -139,9 +139,9 @@ func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f32_f32_f32_2_32_32_3 %acc_copy_element_type = hal.element_type : i32 %acc_copy_seed = arith.constant 10 : i32 %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %result = call @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view %n = arith.constant 2 : i64 - %c = arith.constant 32 : i64 + %c = arith.constant 16 : i64 %h = arith.constant 32 : i64 %w = arith.constant 32 : i64 %f = arith.constant 64 : i64 diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_small.mlir b/linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_small.mlir similarity index 100% rename from linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_small.mlir rename to linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_small.mlir diff --git a/linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_small_calls.mlir b/linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_small_calls.mlir similarity index 100% rename from linalg_ops/convolution/generated/f32_f32_f32/conv2d_f32_f32_f32_small_calls.mlir rename to linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_small_calls.mlir diff --git a/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_large.mlir b/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_large.mlir new file mode 100644 index 0000000..0cdae51 --- /dev/null +++ b/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_large.mlir @@ -0,0 +1,8 @@ +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32(%lhs: tensor<2x128x128x4xf32>, %rhs: tensor<3x3x4x8xf32>, %acc: tensor<2x126x126x8xf32>) -> tensor<2x126x126x8xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x128x128x4xf32>, tensor<3x3x4x8xf32>) outs(%acc: tensor<2x126x126x8xf32>) -> tensor<2x126x126x8xf32> + return %result: tensor<2x126x126x8xf32> +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32(%lhs: tensor<2x128x128x3xf32>, %rhs: tensor<3x3x3x12xf32>, %acc: tensor<2x126x126x12xf32>) -> tensor<2x126x126x12xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x128x128x3xf32>, tensor<3x3x3x12xf32>) outs(%acc: tensor<2x126x126x12xf32>) -> tensor<2x126x126x12xf32> + return %result: tensor<2x126x126x12xf32> +} diff --git a/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_large_calls.mlir b/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_large_calls.mlir new file mode 100644 index 0000000..854a307 --- /dev/null +++ b/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_large_calls.mlir @@ -0,0 +1,108 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32_2_4_128_128_8_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x4x128x128x8x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 128 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 4 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 4 : i64 + %kernel_dim3 = arith.constant 8 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 126 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 126 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 4 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 8 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32_2_3_128_128_12_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x3x128x128x12x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 128 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 3 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 12 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 126 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 12 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 126 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 12 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 3 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 12 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_medium.mlir b/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_medium.mlir new file mode 100644 index 0000000..393c487 --- /dev/null +++ b/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_medium.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%lhs: tensor<2x32x32x2xf32>, %rhs: tensor<3x3x2x2xf32>, %acc: tensor<2x30x30x2xf32>) -> tensor<2x30x30x2xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x2xf32>, tensor<3x3x2x2xf32>) outs(%acc: tensor<2x30x30x2xf32>) -> tensor<2x30x30x2xf32> + return %result: tensor<2x30x30x2xf32> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32(%lhs: tensor<2x32x32x2xf32>, %rhs: tensor<3x3x2x64xf32>, %acc: tensor<2x30x30x64xf32>) -> tensor<2x30x30x64xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x2xf32>, tensor<3x3x2x64xf32>) outs(%acc: tensor<2x30x30x64xf32>) -> tensor<2x30x30x64xf32> + return %result: tensor<2x30x30x64xf32> +} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f32_f32_f32(%lhs: tensor<2x32x32x16xf32>, %rhs: tensor<3x3x16x64xf32>, %acc: tensor<2x30x30x64xf32>) -> tensor<2x30x30x64xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x16xf32>, tensor<3x3x16x64xf32>) outs(%acc: tensor<2x30x30x64xf32>) -> tensor<2x30x30x64xf32> + return %result: tensor<2x30x30x64xf32> +} diff --git a/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_medium_calls.mlir b/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_medium_calls.mlir new file mode 100644 index 0000000..5043f0d --- /dev/null +++ b/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_medium_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32_2_2_32_32_2_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 2 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 2 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32_2_2_32_32_64_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 2 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 64 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 64 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 64 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f32_f32_f32_2_16_32_32_64_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x16x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 16 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 16 : i64 + %kernel_dim3 = arith.constant 64 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 64 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 64 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 16 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_small.mlir b/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_small.mlir new file mode 100644 index 0000000..ea9d92c --- /dev/null +++ b/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_small.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32(%lhs: tensor<1x1x1x1xf32>, %rhs: tensor<1x1x1x1xf32>, %acc: tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x1x1xf32>, tensor<1x1x1x1xf32>) outs(%acc: tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32> + return %result: tensor<1x1x1x1xf32> +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32(%lhs: tensor<1x16x16x1xf32>, %rhs: tensor<2x2x1x1xf32>, %acc: tensor<1x15x15x1xf32>) -> tensor<1x15x15x1xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x16x16x1xf32>, tensor<2x2x1x1xf32>) outs(%acc: tensor<1x15x15x1xf32>) -> tensor<1x15x15x1xf32> + return %result: tensor<1x15x15x1xf32> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%lhs: tensor<2x32x32x2xf32>, %rhs: tensor<3x3x2x2xf32>, %acc: tensor<2x30x30x2xf32>) -> tensor<2x30x30x2xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x2xf32>, tensor<3x3x2x2xf32>) outs(%acc: tensor<2x30x30x2xf32>) -> tensor<2x30x30x2xf32> + return %result: tensor<2x30x30x2xf32> +} diff --git a/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_small_calls.mlir b/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_small_calls.mlir new file mode 100644 index 0000000..b25c720 --- /dev/null +++ b/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_small_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32_1_1_1_1_1_1_1_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x1x1x1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 1 : i64 + %input_dim3 = arith.constant 1 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 1 : i64 + %kernel_dim3 = arith.constant 1 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 1 : i64 + %acc_dim3 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 1 : i64 + %acc_copy_dim3 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 1 : i64 + %w = arith.constant 1 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 1 : i64 + %kw = arith.constant 1 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32_1_1_16_16_1_2_2_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x16x16x1x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 16 : i64 + %input_dim2 = arith.constant 16 : i64 + %input_dim3 = arith.constant 1 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 1 : i64 + %kernel_dim3 = arith.constant 1 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 15 : i64 + %acc_dim2 = arith.constant 15 : i64 + %acc_dim3 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 15 : i64 + %acc_copy_dim2 = arith.constant 15 : i64 + %acc_copy_dim3 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 16 : i64 + %w = arith.constant 16 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 2 : i64 + %kw = arith.constant 2 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32_2_2_32_32_2_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 2 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 2 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_large.mlir b/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_large.mlir new file mode 100644 index 0000000..706848a --- /dev/null +++ b/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_large.mlir @@ -0,0 +1,8 @@ +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_i8_i8_i32(%lhs: tensor<2x4x128x128xi8>, %rhs: tensor<8x4x3x3xi8>, %acc: tensor<2x8x126x126xi32>) -> tensor<2x8x126x126xi32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x4x128x128xi8>, tensor<8x4x3x3xi8>) outs(%acc: tensor<2x8x126x126xi32>) -> tensor<2x8x126x126xi32> + return %result: tensor<2x8x126x126xi32> +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_i8_i8_i32(%lhs: tensor<2x3x128x128xi8>, %rhs: tensor<12x3x3x3xi8>, %acc: tensor<2x12x126x126xi32>) -> tensor<2x12x126x126xi32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x3x128x128xi8>, tensor<12x3x3x3xi8>) outs(%acc: tensor<2x12x126x126xi32>) -> tensor<2x12x126x126xi32> + return %result: tensor<2x12x126x126xi32> +} diff --git a/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_large_calls.mlir b/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_large_calls.mlir new file mode 100644 index 0000000..af106a6 --- /dev/null +++ b/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_large_calls.mlir @@ -0,0 +1,108 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_i8_i8_i32_2_4_128_128_8_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x4x128x128x8x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 4 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 128 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 8 : i64 + %kernel_dim1 = arith.constant 4 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 126 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 126 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 4 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 8 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_i8_i8_i32_2_3_128_128_12_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x3x128x128x12x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 3 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 128 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 12 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 12 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 126 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 12 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 126 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 3 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 12 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_medium.mlir b/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_medium.mlir new file mode 100644 index 0000000..780c670 --- /dev/null +++ b/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_medium.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32(%lhs: tensor<2x2x32x32xi8>, %rhs: tensor<2x2x3x3xi8>, %acc: tensor<2x2x30x30xi32>) -> tensor<2x2x30x30xi32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xi8>, tensor<2x2x3x3xi8>) outs(%acc: tensor<2x2x30x30xi32>) -> tensor<2x2x30x30xi32> + return %result: tensor<2x2x30x30xi32> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_i8_i8_i32(%lhs: tensor<2x2x32x32xi8>, %rhs: tensor<64x2x3x3xi8>, %acc: tensor<2x64x30x30xi32>) -> tensor<2x64x30x30xi32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xi8>, tensor<64x2x3x3xi8>) outs(%acc: tensor<2x64x30x30xi32>) -> tensor<2x64x30x30xi32> + return %result: tensor<2x64x30x30xi32> +} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_i8_i8_i32(%lhs: tensor<2x16x32x32xi8>, %rhs: tensor<64x16x3x3xi8>, %acc: tensor<2x64x30x30xi32>) -> tensor<2x64x30x30xi32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x16x32x32xi8>, tensor<64x16x3x3xi8>) outs(%acc: tensor<2x64x30x30xi32>) -> tensor<2x64x30x30xi32> + return %result: tensor<2x64x30x30xi32> +} diff --git a/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_medium_calls.mlir b/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_medium_calls.mlir new file mode 100644 index 0000000..8c4dc85 --- /dev/null +++ b/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_medium_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32_2_2_32_32_2_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_i8_i8_i32_2_2_32_32_64_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 64 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 64 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 64 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_i8_i8_i32_2_16_32_32_64_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x16x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 16 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 64 : i64 + %kernel_dim1 = arith.constant 16 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 64 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 64 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 16 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_small.mlir b/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_small.mlir new file mode 100644 index 0000000..8acc310 --- /dev/null +++ b/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_small.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_i8_i8_i32(%lhs: tensor<1x1x1x1xi8>, %rhs: tensor<1x1x1x1xi8>, %acc: tensor<1x1x1x1xi32>) -> tensor<1x1x1x1xi32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x1x1xi8>, tensor<1x1x1x1xi8>) outs(%acc: tensor<1x1x1x1xi32>) -> tensor<1x1x1x1xi32> + return %result: tensor<1x1x1x1xi32> +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_i8_i8_i32(%lhs: tensor<1x1x16x16xi8>, %rhs: tensor<1x1x2x2xi8>, %acc: tensor<1x1x15x15xi32>) -> tensor<1x1x15x15xi32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x16x16xi8>, tensor<1x1x2x2xi8>) outs(%acc: tensor<1x1x15x15xi32>) -> tensor<1x1x15x15xi32> + return %result: tensor<1x1x15x15xi32> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32(%lhs: tensor<2x2x32x32xi8>, %rhs: tensor<2x2x3x3xi8>, %acc: tensor<2x2x30x30xi32>) -> tensor<2x2x30x30xi32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xi8>, tensor<2x2x3x3xi8>) outs(%acc: tensor<2x2x30x30xi32>) -> tensor<2x2x30x30xi32> + return %result: tensor<2x2x30x30xi32> +} diff --git a/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_small_calls.mlir b/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_small_calls.mlir new file mode 100644 index 0000000..6f68fe9 --- /dev/null +++ b/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_small_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_i8_i8_i32_1_1_1_1_1_1_1_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x1x1x1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 1 : i64 + %input_dim3 = arith.constant 1 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 1 : i64 + %kernel_dim3 = arith.constant 1 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 1 : i64 + %acc_dim3 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 1 : i64 + %acc_copy_dim3 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 1 : i64 + %w = arith.constant 1 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 1 : i64 + %kw = arith.constant 1 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_i8_i8_i32_1_1_16_16_1_2_2_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x16x16x1x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 16 : i64 + %input_dim3 = arith.constant 16 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 2 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 15 : i64 + %acc_dim3 = arith.constant 15 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 15 : i64 + %acc_copy_dim3 = arith.constant 15 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 16 : i64 + %w = arith.constant 16 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 2 : i64 + %kw = arith.constant 2 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32_2_2_32_32_2_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_medium.mlir b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_medium.mlir index 5b8985b..e64bc66 100644 --- a/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_medium.mlir +++ b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_medium.mlir @@ -6,7 +6,7 @@ func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_i8_i8_i32(%lhs: tensor %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x2xi8>, tensor<3x3x2x64xi8>) outs(%acc: tensor<2x30x30x64xi32>) -> tensor<2x30x30x64xi32> return %result: tensor<2x30x30x64xi32> } -func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_i8_i8_i32(%lhs: tensor<2x32x32x32xi8>, %rhs: tensor<3x3x32x64xi8>, %acc: tensor<2x30x30x64xi32>) -> tensor<2x30x30x64xi32> { - %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x32xi8>, tensor<3x3x32x64xi8>) outs(%acc: tensor<2x30x30x64xi32>) -> tensor<2x30x30x64xi32> +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_i8_i8_i32(%lhs: tensor<2x32x32x16xi8>, %rhs: tensor<3x3x16x64xi8>, %acc: tensor<2x30x30x64xi32>) -> tensor<2x30x30x64xi32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x16xi8>, tensor<3x3x16x64xi8>) outs(%acc: tensor<2x30x30x64xi32>) -> tensor<2x30x30x64xi32> return %result: tensor<2x30x30x64xi32> } diff --git a/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_medium_calls.mlir b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_medium_calls.mlir index e4c2495..ea12edb 100644 --- a/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_medium_calls.mlir +++ b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_medium_calls.mlir @@ -6,7 +6,7 @@ func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view -func.func private @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32_2_2_32_32_2_3_3_acc_0() attributes { iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} @@ -106,21 +106,21 @@ func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_i8_i8_i32_2_2_32_32_64 call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () return } -func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_i8_i8_i32_2_32_32_32_64_3_3_acc_2() attributes { - iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x32x32x32x64x3x3"} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_i8_i8_i32_2_16_32_32_64_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x16x32x32x64x3x3"} } { %device_index = arith.constant 0 : index %device = hal.devices.get %device_index : !hal.device %input_dim0 = arith.constant 2 : i64 %input_dim1 = arith.constant 32 : i64 %input_dim2 = arith.constant 32 : i64 - %input_dim3 = arith.constant 32 : i64 + %input_dim3 = arith.constant 16 : i64 %input_element_type = hal.element_type : i32 %input_seed = arith.constant 8 : i32 %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view %kernel_dim0 = arith.constant 3 : i64 %kernel_dim1 = arith.constant 3 : i64 - %kernel_dim2 = arith.constant 32 : i64 + %kernel_dim2 = arith.constant 16 : i64 %kernel_dim3 = arith.constant 64 : i64 %kernel_element_type = hal.element_type : i32 %kernel_seed = arith.constant 9 : i32 @@ -139,9 +139,9 @@ func.func @conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_i8_i8_i32_2_32_32_32_ %acc_copy_element_type = hal.element_type : i32 %acc_copy_seed = arith.constant 10 : i32 %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view - %result = call @module.conv2d_accumulate_2_32_32_32_times_3_3_64_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view %n = arith.constant 2 : i64 - %c = arith.constant 32 : i64 + %c = arith.constant 16 : i64 %h = arith.constant 32 : i64 %w = arith.constant 32 : i64 %f = arith.constant 64 : i64 diff --git a/linalg_ops/test_utils.c b/linalg_ops/test_utils.c index 05065b9..e86702b 100644 --- a/linalg_ops/test_utils.c +++ b/linalg_ops/test_utils.c @@ -22,7 +22,7 @@ #include "iree/tooling/device_util.h" #include "iree/vm/api.h" -IREE_FLAG(bool, require_exact_results, false, +IREE_FLAG(bool, require_exact_results, true, "Requires floating point result elements to match exactly."); bool iree_test_utils_require_exact_results(void) { @@ -193,10 +193,10 @@ bool iree_test_utils_result_elements_agree(iree_test_utils_e2e_value_t expected, // expected values. Inexact results are only permitted when the // `require_exact_results` flag is set to `false`. case IREE_TEST_UTILS_VALUE_TYPE_F16: - if (actual.f16 == expected.f16) return true; + if (actual.f16_u16 == expected.f16_u16) return true; if (iree_test_utils_require_exact_results()) return false; - return fabsf(iree_math_f16_to_f32(actual.f16) - - iree_math_f16_to_f32(expected.f16)) < + return fabsf(iree_math_f16_to_f32(actual.f16_u16) - + iree_math_f16_to_f32(expected.f16_u16)) < acceptable_fp_delta; case IREE_TEST_UTILS_VALUE_TYPE_BF16: if (actual.bf16_u16 == expected.bf16_u16) return true; diff --git a/linalg_ops/test_utils.h b/linalg_ops/test_utils.h index f86986b..626097b 100644 --- a/linalg_ops/test_utils.h +++ b/linalg_ops/test_utils.h @@ -62,7 +62,6 @@ typedef struct iree_test_utils_value_t { int16_t i16; int32_t i32; int64_t i64; - float f16; float f32; uint16_t f16_u16; uint16_t bf16_u16; From d00fedd328382bcb00b057e2f57b8391817b0594 Mon Sep 17 00:00:00 2001 From: erman-gurses Date: Wed, 30 Oct 2024 12:18:37 -0500 Subject: [PATCH 15/16] Remove outdated comments Signed-off-by: erman-gurses --- linalg_ops/convolution/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linalg_ops/convolution/CMakeLists.txt b/linalg_ops/convolution/CMakeLists.txt index 73bf3cf..460cb2b 100644 --- a/linalg_ops/convolution/CMakeLists.txt +++ b/linalg_ops/convolution/CMakeLists.txt @@ -209,7 +209,7 @@ endforeach() endif() -# CPU and GPU tests for "i8_nhwc_i8_hwcf_i32" combination without Winograd +# CPU and GPU tests for without Winograd set(_DTYPES_AND_LAYOUTS) list(APPEND _DTYPES_AND_LAYOUTS "i8_nhwc_i8_hwcf_i32") list(APPEND _DTYPES_AND_LAYOUTS "i8_nchw_i8_fchw_i32") From cf73da6af11f5646926f23a4c9a8a5703dde171a Mon Sep 17 00:00:00 2001 From: erman-gurses Date: Wed, 30 Oct 2024 12:29:44 -0500 Subject: [PATCH 16/16] Formatting Signed-off-by: erman-gurses --- linalg_ops/test_utils.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/linalg_ops/test_utils.c b/linalg_ops/test_utils.c index e86702b..9762861 100644 --- a/linalg_ops/test_utils.c +++ b/linalg_ops/test_utils.c @@ -196,8 +196,8 @@ bool iree_test_utils_result_elements_agree(iree_test_utils_e2e_value_t expected, if (actual.f16_u16 == expected.f16_u16) return true; if (iree_test_utils_require_exact_results()) return false; return fabsf(iree_math_f16_to_f32(actual.f16_u16) - - iree_math_f16_to_f32(expected.f16_u16)) < - acceptable_fp_delta; + iree_math_f16_to_f32(expected.f16_u16)) < + acceptable_fp_delta; case IREE_TEST_UTILS_VALUE_TYPE_BF16: if (actual.bf16_u16 == expected.bf16_u16) return true; if (iree_test_utils_require_exact_results()) return false;