diff --git a/linalg_ops/CMakeLists.txt b/linalg_ops/CMakeLists.txt index 08cf318..015b318 100644 --- a/linalg_ops/CMakeLists.txt +++ b/linalg_ops/CMakeLists.txt @@ -114,6 +114,26 @@ iree_cc_binary( iree::vm::cc ) +iree_cc_binary( + NAME + iree-e2e-conv2d-test + SRCS + "iree-e2e-conv2d-test.cc" + DEPS + ::test_utils + iree::base + iree::base::internal + iree::base::internal::cpu + iree::base::internal::flags + iree::base::internal::path + iree::hal + iree::modules::hal + iree::tooling::context_util + iree::tooling::device_util + iree::vm + iree::vm::cc +) + #------------------------------------------------------------------------------- # Tests #------------------------------------------------------------------------------- @@ -123,3 +143,4 @@ include(iree_test_suites_native_test) include(iree_test_suites_runner_test) add_subdirectory(matmul) +add_subdirectory(convolution) diff --git a/linalg_ops/convolution/CMakeLists.txt b/linalg_ops/convolution/CMakeLists.txt new file mode 100644 index 0000000..460cb2b --- /dev/null +++ b/linalg_ops/convolution/CMakeLists.txt @@ -0,0 +1,309 @@ +# Copyright 2024 The IREE Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# TODO(scotttodd): add filtering here, in the helper functions, or in ctest to +# choose which tests to compile and run + +set(_SIZES) +list(APPEND _SIZES "large") +list(APPEND _SIZES "medium") +list(APPEND _SIZES "small") + + +set(_DTYPES_AND_LAYOUTS) +list(APPEND _DTYPES_AND_LAYOUTS "f16_nhwc_f16_hwcf_f16") +list(APPEND _DTYPES_AND_LAYOUTS "f16_nchw_f16_fchw_f16") +list(APPEND _DTYPES_AND_LAYOUTS "f16_nhwc_f16_hwcf_f32") +list(APPEND _DTYPES_AND_LAYOUTS "f16_nchw_f16_fchw_f32") +list(APPEND _DTYPES_AND_LAYOUTS "f32_nhwc_f32_hwcf_f32") +list(APPEND _DTYPES_AND_LAYOUTS "f32_nchw_f32_fchw_f32") + +############################################################################### +# +# CPU - llvm-cpu on local-task, default flags. +# +############################################################################### + +foreach(_DTYPE_AND_LAYOUT IN LISTS _DTYPES_AND_LAYOUTS) + foreach(_SIZE IN LISTS _SIZES) + iree_test_suites_runner_test( + NAME + conv2d_llvm-cpu_local-task_${_DTYPE_AND_LAYOUT}_${_SIZE} + TESTS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}.mlir" + CALLS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}_calls.mlir" + TEST_RUNNER + iree-test-suites_iree-e2e-conv2d-test + TARGET_BACKEND + "llvm-cpu" + DRIVER + "local-task" + COMPILER_FLAGS + RUNNER_FLAGS + LABELS + "hostonly" + "local" + ) + endforeach() +endforeach() + +############################################################################### +# +# CPU - Winograd llvm-cpu on local-task, default flags. +# +############################################################################### + +foreach(_DTYPE_AND_LAYOUT IN LISTS _DTYPES_AND_LAYOUTS) + foreach(_SIZE IN LISTS _SIZES) + iree_test_suites_runner_test( + NAME + conv2d_winograd_llvm-cpu_local-task_${_DTYPE_AND_LAYOUT}_${_SIZE} + TESTS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}.mlir" + CALLS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}_calls.mlir" + TEST_RUNNER + iree-test-suites_iree-e2e-conv2d-test + TARGET_BACKEND + "llvm-cpu" + DRIVER + "local-task" + COMPILER_FLAGS + "--iree-preprocessing-pass-pipeline=builtin.module\(func.func\(iree-linalg-ext-convert-conv2d-to-winograd{replace-all-convs=true}\)\)" + RUNNER_FLAGS + LABELS + "hostonly" + "local" + TARGET_CPU_FEATURES_VARIANTS + "default" + ) + endforeach() +endforeach() + +############################################################################### +# +# GPU - ROCm/HIP, CDNA(gfx9). +# +############################################################################### + +# To distinguish between CDNA(gfx9) and RDNA3(gfx11) +if(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx9") + +foreach(_DTYPE_AND_LAYOUT IN LISTS _DTYPES_AND_LAYOUTS) + foreach(_SIZE IN LISTS _SIZES) + iree_test_suites_runner_test( + NAME + conv2d_rocm_hip_${_DTYPE_AND_LAYOUT}_${_SIZE} + TESTS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}.mlir" + CALLS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}_calls.mlir" + TEST_RUNNER + iree-test-suites_iree-e2e-conv2d-test + TARGET_BACKEND + "rocm" + DRIVER + "hip" + COMPILER_FLAGS + "--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}" + RUNNER_FLAGS + LABELS + ) + endforeach() +endforeach() + +############################################################################### +# +# Winograd GPU - ROCm/HIP, CDNA(gfx9). +# +############################################################################### + +foreach(_DTYPE_AND_LAYOUT IN LISTS _DTYPES_AND_LAYOUTS) + foreach(_SIZE IN LISTS _SIZES) + iree_test_suites_runner_test( + NAME + conv2d_winograd_rocm_hip_${_DTYPE_AND_LAYOUT}_${_SIZE} + TESTS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}.mlir" + CALLS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}_calls.mlir" + TEST_RUNNER + iree-test-suites_iree-e2e-conv2d-test + TARGET_BACKEND + "rocm" + DRIVER + "hip" + COMPILER_FLAGS + "--iree-preprocessing-pass-pipeline=builtin.module\(func.func\(iree-linalg-ext-convert-conv2d-to-winograd{replace-all-convs=true}\)\)" + "--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}" + RUNNER_FLAGS + LABELS + ) + endforeach() +endforeach() + +############################################################################### +# +# GPU - ROCm/HIP, CDNA(gfx11) +# +############################################################################### + +elseif(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx11") + +foreach(_DTYPE_AND_LAYOUT IN LISTS _DTYPES_AND_LAYOUTS) + foreach(_SIZE IN LISTS _SIZES) + iree_test_suites_runner_test( + NAME + conv2d_rocm_hip_${_DTYPE_AND_LAYOUT}_${_SIZE} + TESTS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}.mlir" + CALLS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}_calls.mlir" + TEST_RUNNER + iree-test-suites_iree-e2e-conv2d-test + TARGET_BACKEND + "rocm" + DRIVER + "hip" + COMPILER_FLAGS + "--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}" + RUNNER_FLAGS + LABELS + ) + endforeach() +endforeach() + +############################################################################### +# +# Winograd GPU - ROCm/HIP, CDNA(gfx11). +# +############################################################################### + +foreach(_DTYPE_AND_LAYOUT IN LISTS _DTYPES_AND_LAYOUTS) + foreach(_SIZE IN LISTS _SIZES) + iree_test_suites_runner_test( + NAME + conv2d_winograd_rocm_hip_${_DTYPE_AND_LAYOUT}_${_SIZE} + TESTS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}.mlir" + CALLS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}_calls.mlir" + TEST_RUNNER + iree-test-suites_iree-e2e-conv2d-test + TARGET_BACKEND + "rocm" + DRIVER + "hip" + COMPILER_FLAGS + "--iree-preprocessing-pass-pipeline=builtin.module\(func.func\(iree-linalg-ext-convert-conv2d-to-winograd{replace-all-convs=true}\)\)" + "--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}" + RUNNER_FLAGS + LABELS + ) + endforeach() +endforeach() + +endif() + +# CPU and GPU tests for without Winograd +set(_DTYPES_AND_LAYOUTS) +list(APPEND _DTYPES_AND_LAYOUTS "i8_nhwc_i8_hwcf_i32") +list(APPEND _DTYPES_AND_LAYOUTS "i8_nchw_i8_fchw_i32") + +############################################################################### +# +# CPU - llvm-cpu on local-task, default flags. +# +############################################################################### + +foreach(_DTYPE_AND_LAYOUT IN LISTS _DTYPES_AND_LAYOUTS) + foreach(_SIZE IN LISTS _SIZES) + iree_test_suites_runner_test( + NAME + conv2d_llvm-cpu_local-task_${_DTYPE_AND_LAYOUT}_${_SIZE} + TESTS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}.mlir" + CALLS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}_calls.mlir" + TEST_RUNNER + iree-test-suites_iree-e2e-conv2d-test + TARGET_BACKEND + "llvm-cpu" + DRIVER + "local-task" + COMPILER_FLAGS + RUNNER_FLAGS + LABELS + "hostonly" + "local" + ) + endforeach() +endforeach() + +############################################################################### +# +# GPU - ROCm/HIP, CDNA(gfx9). +# +############################################################################### + +# To distinguish between CDNA(gfx9) and RDNA3(gfx11) +if(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx9") + +foreach(_DTYPE_AND_LAYOUT IN LISTS _DTYPES_AND_LAYOUTS) + foreach(_SIZE IN LISTS _SIZES) + iree_test_suites_runner_test( + NAME + conv2d_rocm_hip_${_DTYPE_AND_LAYOUT}_${_SIZE} + TESTS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}.mlir" + CALLS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}_calls.mlir" + TEST_RUNNER + iree-test-suites_iree-e2e-conv2d-test + TARGET_BACKEND + "rocm" + DRIVER + "hip" + COMPILER_FLAGS + "--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}" + RUNNER_FLAGS + LABELS + ) + endforeach() +endforeach() + +############################################################################### +# +# GPU - ROCm/HIP, CDNA(gfx11) +# +############################################################################### +elseif(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx11") + +foreach(_DTYPE_AND_LAYOUT IN LISTS _DTYPES_AND_LAYOUTS) + foreach(_SIZE IN LISTS _SIZES) + iree_test_suites_runner_test( + NAME + conv2d_rocm_hip_${_DTYPE_AND_LAYOUT}_${_SIZE} + TESTS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}.mlir" + CALLS_SRC + "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}_calls.mlir" + TEST_RUNNER + iree-test-suites_iree-e2e-conv2d-test + TARGET_BACKEND + "rocm" + DRIVER + "hip" + COMPILER_FLAGS + "--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}" + RUNNER_FLAGS + LABELS + ) + endforeach() +endforeach() + +endif() diff --git a/linalg_ops/convolution/generate_e2e_conv2d_tests.py b/linalg_ops/convolution/generate_e2e_conv2d_tests.py new file mode 100644 index 0000000..1a1d984 --- /dev/null +++ b/linalg_ops/convolution/generate_e2e_conv2d_tests.py @@ -0,0 +1,699 @@ +#!/usr/bin/env python3 +# Copyright 2024 The IREE Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +"""Generator for e2e conv2d tests. +""" + +from typing import Optional +import argparse +import enum +import dataclasses +import typing +import math +import itertools +import re + + +# Data type of kernel entries. The string values must match MLIR data types. +@enum.unique +class KernelElemTypeId(enum.Enum): + NONE = "" + I8 = "i8" + F16 = "f16" + F32 = "f32" + + +# Data type of input entries. The string values must match MLIR data types. +@enum.unique +class InputElemTypeId(enum.Enum): + NONE = "" + I8 = "i8" + F16 = "f16" + F32 = "f32" + + +# Data type of input entries. The string values must match MLIR data types. +@enum.unique +class AccElemTypeId(enum.Enum): + NONE = "" + F16 = "f16" + I32 = "i32" + F32 = "f32" + + +# Enumerates of the collections of shapes that we can generate tests for. +# The values are the accepted values for the --shapes= flag. +@enum.unique +class ShapesId(enum.Enum): + SMALL = "small" + MEDIUM = "medium" + LARGE = "large" + + +# Enumerates ways to construct MLIR tensor types. +# TODO: Enable dynamic dimensions once the tests start passing. +@enum.unique +class Dynamicity(enum.Enum): + DYNAMIC = "dynamic" # Use '?' everywhere. Example: tensor. + STATIC = "static" # Use fixed values everywhere. Example: tensor<4x6xf32>. + MIXED = "mixed" # Randomly mix '?' and values. Example: tensor. + + +# TODO: Add more input layouts as needed. The layout determines the dim of input and kernel. +@enum.unique +class InputLayout(enum.Enum): + NCHW = "nchw" + NHWC = "nhwc" + + +# TODO: Add more kernel layouts as needed. +@enum.unique +class KernelLayout(enum.Enum): + FCHW = "fchw" + HWCF = "hwcf" + + +# Describes the shape of a tensor conv2d in the usual convention: +# the input is {n}x{c}x{h}x{w}, the kernel is {f}x{c}x{kh}x{kw}, the accumulator/result is +# {n}x{f}x{oh}x{ow}. +# The extra `accumulate` boolean tells whether the conv2d is accumulating into +# an existing accumulator (C += A * B) or just overwriting the result +# (C = A * B). +@dataclasses.dataclass +class TestShape: + n: int + c: int + h: int + w: int + kh: int + kw: int + f: int + accumulate: bool + + +# Attributes for the linalg.conv2d operation. +@dataclasses.dataclass +class ConvAttrs: + STRIDE: typing.Tuple[int, int] = (1, 1) + DILATION: typing.Tuple[int, int] = (1, 1) + + +# Returns the list of TestShape's to use for the collection of shapes +# identified by shapes_id. +def get_test_shapes(shapes_id: ShapesId): + # Notes: + # 1. Be conservative in adding more shapes, as that can increase both the + # build and execution latency of tests. The build latency is nearly the + # same for all shapes, while execution latency grows linearly with + # n*f*ow*oh*kh*kw. + + if shapes_id == ShapesId.SMALL: + return [ + TestShape(n=1, c=1, h=1, w=1, kh=1, kw=1, f=1, accumulate=True), + TestShape(n=1, c=1, h=16, w=16, kh=2, kw=2, f=1, accumulate=True), + TestShape(n=2, c=2, h=32, w=32, kh=3, kw=3, f=2, accumulate=True), + ] + if shapes_id == ShapesId.MEDIUM: + return [ + TestShape(n=2, c=2, h=32, w=32, kh=3, kw=3, f=2, accumulate=True), + TestShape(n=2, c=2, h=32, w=32, kh=3, kw=3, f=64, accumulate=True), + TestShape(n=2, c=16, h=32, w=32, kh=3, kw=3, f=64, accumulate=True), + ] + if shapes_id == ShapesId.LARGE: + return [ + TestShape(n=2, c=4, h=128, w=128, kh=3, kw=3, f=8, accumulate=True), + TestShape(n=2, c=3, h=128, w=128, kh=3, kw=3, f=12, accumulate=True), + ] + + raise ValueError(shapes_id) + + +# A shape dimension value, i.e. a size value that could appear in a MLIR type +# such as 'tensor'. None means a dynamic size, similar to '?' in MLIR. +@dataclasses.dataclass +class DimSize: + value: typing.Optional[int] + + +# Generates a compile-time MLIR size value, i.e. either a fixed positive integer +# or None (which maps to MLIR '?') depending on dynamicity. +def shape_dim(x: int, dynamicity: Dynamicity): + if dynamicity == Dynamicity.DYNAMIC: + return DimSize(None) + elif dynamicity == Dynamicity.STATIC: + return DimSize(x) + else: + raise ValueError(dynamicity) + + +# Stringification used for generating MLIR types, e.g. tensor. +def int_or_question_mark(s: DimSize): + return s.value or "?" + + +# Stringification used for generating alphanumeric identifiers, e.g. +# func.func @somefunction_DYNxDYNxf32, where we can't use "?" characters. +def int_or_DYN(s: DimSize): + return s.value or "DYN" + + +# Determines the shape of input and kernel tensors. +@dataclasses.dataclass +class TestInputTensorShapes: + n: DimSize + c: DimSize + h: DimSize + w: DimSize + kh: DimSize + kw: DimSize + f: DimSize + + +# Helper for generate_function. Generates TestInputTensorShapes, i.e. +# converts from the runtime shape dimensions in TestShape and given dynamicity to +# the set of shapes to be used in a test function's input tensors. +def generate_shapes(shape: TestShape, dynamicity: Dynamicity): + n = shape_dim(shape.n, dynamicity) + c = shape_dim(shape.c, dynamicity) + h = shape_dim(shape.h, dynamicity) + w = shape_dim(shape.w, dynamicity) + kh = shape_dim(shape.kh, dynamicity) + kw = shape_dim(shape.kw, dynamicity) + f = shape_dim(shape.f, dynamicity) + shapes = TestInputTensorShapes( + n=n, + c=c, + h=h, + w=w, + kh=kh, + kw=kw, + f=f, + ) + return shapes + + +# Helper to calculate the output shape based on the input shape, kernel shape, +# dilation and stride. +def calc_out_shape(i_shape: int, k_shape: int, dilation_val: int, stride_val: int): + x = (k_shape - 1) * (dilation_val - 1) + x = i_shape - k_shape - x + return math.floor(x / stride_val) + 1 + + +# Helper to return input, kernel and output shapes based on the layout and Conv2dParams. +def get_tensor_shape( + shapes: TestShape, + kernel_layout: KernelLayout, + input_layout: InputLayout, + conv_attr: ConvAttrs, +): + n = shapes.n + c = shapes.c + h = shapes.h + w = shapes.w + kh = shapes.kh + kw = shapes.kw + f = shapes.f + + # Extract input dimensions + input_height, input_width = h, w + + # Extract kernel dimensions + kernel_height, kernel_width = kh, kw + + # Get the dilation and stride + dilation = conv_attr.DILATION + stride = conv_attr.STRIDE + + # Calculate output height. + oh = calc_out_shape(input_height, kernel_height, dilation[0], stride[0]) + # Calculate output width. + ow = calc_out_shape(input_width, kernel_width, dilation[1], stride[1]) + + input_tensor_shape, kernel_tensor_shape, output_tensor_shape = [], [], [] + + if input_layout == InputLayout.NCHW: + input_tensor_shape = [n, c, h, w] + output_tensor_shape = [n, f, oh, ow] + elif input_layout == InputLayout.NHWC: + input_tensor_shape = [n, h, w, c] + output_tensor_shape = [n, oh, ow, f] + else: + raise ValueError(input_layout) + + if kernel_layout == KernelLayout.FCHW: + kernel_tensor_shape = [f, c, kh, kw] + elif kernel_layout == KernelLayout.HWCF: + kernel_tensor_shape = [kh, kw, c, f] + else: + raise ValueError(kernel_layout) + + return input_tensor_shape, kernel_tensor_shape, output_tensor_shape + + +# Helper for generate_function. +# Generates a name for a test function in the generated MLIR code. +def generate_function_name( + input_type: InputElemTypeId, + kernel_type: KernelElemTypeId, + output_type: AccElemTypeId, + shapes: TestInputTensorShapes, + accumulate: bool, +): + input_t = input_type.value + kernel_t = kernel_type.value + acc_t = output_type.value + n = int_or_DYN(shapes.n) + c = int_or_DYN(shapes.c) + h = int_or_DYN(shapes.h) + w = int_or_DYN(shapes.w) + kh = int_or_DYN(shapes.kh) + kw = int_or_DYN(shapes.kw) + f = int_or_DYN(shapes.f) + + conv2d_kind = "conv2d_accumulate" if accumulate else "conv2d" + return ( + f"{conv2d_kind}_{n}_{c}_{h}_{w}_times_" + + f"{kh}_{kw}_{f}_dtype_{input_t}_{kernel_t}_{acc_t}" + ) + + +# Represents a generated test function. +@dataclasses.dataclass +class MLIRFunction: + name: str + signature: str + import_declaration: str + definition: str + + +# Generates a test function in the generated MLIR code. +# The generated function will take the same arguments as linalg.conv2d variants +# and will just call linalg.conv2d variants with them, returning its result. +def generate_function( + input_type: InputElemTypeId, + input_layout: InputLayout, + kernel_type: KernelElemTypeId, + kernel_layout: KernelLayout, + acc_type: AccElemTypeId, + conv2d_attr: ConvAttrs, + shape: TestShape, + dynamicity: Dynamicity, +): + shapes = generate_shapes(shape, dynamicity) + func_name = generate_function_name( + input_type, + kernel_type, + acc_type, + shapes, + shape.accumulate, + ) + + input_shape, kernel_shape, output_shape = get_tensor_shape( + shape, kernel_layout, input_layout, conv2d_attr + ) + input_tensor_type = f"tensor<{input_shape[0]}x{input_shape[1]}x{input_shape[2]}x{input_shape[3]}x{input_type.value}>" + kernel_tensor_type = f"tensor<{kernel_shape[0]}x{kernel_shape[1]}x{kernel_shape[2]}x{kernel_shape[3]}x{kernel_type.value}>" + + acc_tensor_type = f"tensor<{output_shape[0]}x{output_shape[1]}x{output_shape[2]}x{output_shape[3]}x{acc_type.value}>" + + op_name = None + if input_layout == InputLayout.NCHW: + if kernel_layout == KernelLayout.FCHW: + op_name = "linalg.conv_2d_nchw_fchw" + if kernel_layout == KernelLayout.HWCF: + op_name = "linalg.conv_2d_nchw_hwcf" + elif input_layout == InputLayout.NHWC: + if kernel_layout == KernelLayout.HWCF: + op_name = "linalg.conv_2d_nhwc_hwcf" + + if op_name is None: + raise ValueError("Invalid combination of input_layout and kernel_layout") + + conv_attr = f"{{dilations = dense<{list(conv2d_attr.DILATION)}> : tensor<2xi64>, strides = dense<{list(conv2d_attr.STRIDE)}> : tensor<2xi64>}}" + + # Compilation info is optional; prints empty string by default. + func_definition = "" + + signature = f"({input_tensor_type}, {kernel_tensor_type}, {acc_tensor_type}) -> {acc_tensor_type}" + import_declaration = f"func.func private @module.{func_name}(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view" + func_definition = func_definition + ( + f"func.func @{func_name}(%lhs: {input_tensor_type}, %rhs: {kernel_tensor_type}, %acc: {acc_tensor_type}) -> {acc_tensor_type} {{\n" + f" %result = {op_name} {conv_attr} ins(%lhs, %rhs: {input_tensor_type}, {kernel_tensor_type}) outs(%acc: {acc_tensor_type}) -> {acc_tensor_type}\n" + f" return %result: {acc_tensor_type}\n" + f"}}" + ) + + return MLIRFunction( + name=func_name, + signature=signature, + import_declaration=import_declaration, + definition=func_definition, + ) + + +# Represents a call to a generated test function. +@dataclasses.dataclass +class TestCall: + function: MLIRFunction + op: str + + +# Enumerates ways to initialize tensor buffer contents. +@enum.unique +class TensorGenerator(enum.Enum): + ZERO = "zero" # Fill with zeros + RANDOM = "random" # Fill with (deterministic) pseudorandom values. + + +# Intentionally fixed seed! We want full reproducibility here, both across runs +# and across machines. +# Intentionally not shared with local_pseudorandom_state to limit the ways +# in which shuffling testcases changes which random values are generated. +pseudorandom_generator_seed = 1 + + +# Generate a 4d tensor function argument of the given size as `%name`. +def generate_random_4d_tensor( + name: str, + tensor_shape: list, + element_type: typing.Union[InputElemTypeId, KernelElemTypeId], +): + global pseudorandom_generator_seed + pseudorandom_generator_seed = pseudorandom_generator_seed + 1 + return ( + f" %{name}_dim0 = arith.constant {tensor_shape[0]} : i64\n" + f" %{name}_dim1 = arith.constant {tensor_shape[1]} : i64\n" + f" %{name}_dim2 = arith.constant {tensor_shape[2]} : i64\n" + f" %{name}_dim3 = arith.constant {tensor_shape[3]} : i64\n" + f" %{name}_element_type = hal.element_type<{element_type.value}> : i32\n" + f" %{name}_seed = arith.constant {pseudorandom_generator_seed} : i32\n" + f" %{name} = call @conv2d_test.generate_random_tensor(%device, %{name}_dim0, %{name}_dim1, %{name}_dim2, %{name}_dim3, %{name}_element_type, %{name}_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view\n" + ) + + +call_id = 0 + + +def generate_call( + function: MLIRFunction, + input_type: InputElemTypeId, + input_layout: InputLayout, + kernel_type: KernelElemTypeId, + kernel_layout: KernelLayout, + conv2d_attr: ConvAttrs, + acc_type: AccElemTypeId, + shape: TestShape, +): + global call_id + func_name = f"{function.name}_{shape.n}_{shape.c}_{shape.h}_{shape.w}_{shape.f}_{shape.kh}_{shape.kw}" + if shape.accumulate: + func_name = f"{func_name}_acc" + func_name = f"{func_name}_{call_id}" + call_id = call_id + 1 + + # layout of output tensor for checking correctness + layout = -1 + + if input_layout == InputLayout.NCHW: + if kernel_layout == KernelLayout.FCHW or kernel_layout == KernelLayout.HWCF: + layout = 0 # for output tensor NxFxOHxOW + else: + raise ValueError(kernel_layout) + elif input_layout == InputLayout.NHWC: + if kernel_layout == KernelLayout.HWCF: + layout = 1 # for output tensor NxOHxOWxF + else: + raise ValueError(kernel_layout) + else: + raise ValueError(InputLayout) + + description = f"Conv2d shape (NxCxHxWxFxKHxKW): {shape.n}x{shape.c}x{shape.h}x{shape.w}x{shape.f}x{shape.kh}x{shape.kw}" + op = ( + f"func.func @{func_name}() attributes {{\n" + f' iree.reflection = {{description = "{description}"}}\n' + "} {\n" + " %device_index = arith.constant 0 : index\n" + " %device = hal.devices.get %device_index : !hal.device\n" + ) + + inp_shape, kernel_shape, out_shape = get_tensor_shape( + shape, + kernel_layout, + input_layout, + conv2d_attr, + ) + + op = op + generate_random_4d_tensor("input", inp_shape, input_type) + op = op + generate_random_4d_tensor("kernel", kernel_shape, kernel_type) + if shape.accumulate: + op = op + generate_random_4d_tensor("acc", out_shape, acc_type) + # TODO(#16168): there's a bug with in-place input->output aliasing and + # we work around it here by passing in a unique copy. + global pseudorandom_generator_seed + pseudorandom_generator_seed = pseudorandom_generator_seed - 1 + op = op + generate_random_4d_tensor("acc_copy", out_shape, acc_type) + op = op + ( + f" %result = call @module.{function.name}(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view\n" + ) + else: + op = op + ( + f" %acc = util.null : !hal.buffer_view\n" + f" %result = call @module.{function.name}(%input, %kernel) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view\n" + ) + + op = op + ( + f" %n = arith.constant {shape.n} : i64\n" + f" %c = arith.constant {shape.c} : i64\n" + f" %h = arith.constant {shape.h} : i64\n" + f" %w = arith.constant {shape.w} : i64\n" + f" %f = arith.constant {shape.f} : i64\n" + f" %kh = arith.constant {shape.kh} : i64\n" + f" %kw = arith.constant {shape.kw} : i64\n" + f" %layout = arith.constant {layout} : i64\n" + f" %sh = arith.constant {conv2d_attr.STRIDE[0]} : i64\n" + f" %sw = arith.constant {conv2d_attr.STRIDE[1]} : i64\n" + f" %dh = arith.constant {conv2d_attr.DILATION[0]} : i64\n" + f" %dw = arith.constant {conv2d_attr.DILATION[1]} : i64\n" + f" call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> ()\n" + ) + + op = op + " return\n" + op = op + "}\n" + + return TestCall(function=function, op=op) + + +# Generates all output files' contents as strings. +def generate( + input_elem_type: InputElemTypeId, + input_layout: InputLayout, + kernel_elem_type: KernelElemTypeId, + kernel_layout: KernelLayout, + conv2d_attr: ConvAttrs, + acc_type: AccElemTypeId, + shapes_id: ShapesId, +): + functions = {} + calls = [] + + for shape in get_test_shapes(shapes_id): + for dynamicity in [Dynamicity.STATIC]: + function = generate_function( + input_elem_type, + input_layout, + kernel_elem_type, + kernel_layout, + acc_type, + conv2d_attr, + shape, + dynamicity, + ) + # Different testcases may differ only by runtime parameters but + # share the same code. For example, dynamic-shapes testcases + # share the same code involing tensor even though the runtime + # value in the trace are different. That's why we append conditionally + # to calls, but unconditionally to function_definitions. + if function.name not in functions: + functions[function.name] = function + calls.append( + generate_call( + function, + input_elem_type, + input_layout, + kernel_elem_type, + kernel_layout, + conv2d_attr, + acc_type, + shape, + ) + ) + + return (functions, calls) + + +def parse_arguments(): + parser = argparse.ArgumentParser(description="Generator of e2e conv2d tests") + parser.add_argument( + "--output_conv2d_mlir", + type=str, + help="Path of output .mlir file containing the generated conv2d functions", + required=True, + ) + parser.add_argument( + "--output_calls_mlir", + type=str, + help="Path of output .mlir file containing the calls", + required=True, + ) + parser.add_argument( + "--input_type", + type=str, + choices=["i8", "f32", "f16"], + help="Numeric type of input tensors", + required=True, + ) + parser.add_argument( + "--input_layout", + type=str, + default="nchw", + choices=["nchw", "nhwc"], + help="Layout of the input tensor. Currently, only nchw is supported.", + required=False, + ) + parser.add_argument( + "--kernel_type", + type=str, + choices=["i8", "f32", "f16"], + help="Numeric type of input tensors", + required=True, + ) + parser.add_argument( + "--kernel_layout", + type=str, + default="fchw", + choices=["fchw", "hwcf"], + help="Layout of kernel tensor. Currently, only fchw is supported.", + required=False, + ) + parser.add_argument( + "--acc_type", + type=str, + choices=["i32", "f32", "f16"], + help="Numeric type of input tensors", + default="", + required=False, + ) + parser.add_argument( + "--shapes", + type=str, + choices=[s.value for s in ShapesId], + help="Collection of tensor shapes to test", + required=True, + ) + parser.add_argument( + "--dilation", + type=str, + default="1,1", + help="The dilation factor for the convolution operation. Comma-separated. As in 1,1", + required=False, + ) + parser.add_argument( + "--stride", + type=str, + default="1,1", + help="The stride factor for the convolution operation. Comma-separated. As in 1,1", + required=False, + ) + parser.add_argument( + "--requirements", + type=str, + help="Target requirements for this module. Comma-separated. As in -iree-llvmcpu-target-cpu-features. If the target device does not meet all of the requirements, the test will be skipped.", + required=False, + ) + return parser.parse_args() + + +def write_code_file(functions, filename): + with open(filename, "w") as file: + for function in functions.values(): + file.write(function.definition + "\n") + + +def write_calls_file(functions, calls, filename, requirements): + # TODO(scotttodd): write "GENERATED BY" comment to the top of the file + + # Module-level reflection information used to control the test tool. + # TODO(scotttodd): drop this and whatever logic in the test tool used it + # multiple backends should be able to use the same input IR, so the + # input IR shouldn't need things like CPU features in it + reflection = "" + if requirements: + reflection = ( + "iree.reflection = {" + 'target_features = "' + + ",".join([req.lstrip("+") for req in requirements.split(",")]) + + '"' + "}" + ) + module_definition = ( + f"builtin.module @calls attributes {{\n" f" {reflection}\n" f"}} {{\n\n" + ) + + # Declare the custom module that generates arguments. + module_definition = module_definition + ( + "func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view\n" + "func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view)\n" + ) + + # Declare the functions that will be called. + for function in functions.values(): + module_definition = module_definition + function.import_declaration + "\n" + module_definition = module_definition + "\n" + + # Emit the test cases for each call. + for call in calls: + module_definition = module_definition + call.op + "" + + module_definition = module_definition + "}\n" + + with open(filename, "w") as file: + file.write(module_definition) + + +def main(args): + input_type = InputElemTypeId(args.input_type) + input_layout = InputLayout(args.input_layout) + kernel_type = KernelElemTypeId(args.kernel_type) + kernel_layout = KernelLayout(args.kernel_layout) + acc_type = AccElemTypeId(args.acc_type) + shapes_id = ShapesId(args.shapes) + conv2d_attr = ConvAttrs( + tuple(map(int, args.stride.split(","))), + tuple(map(int, args.dilation.split(","))), + ) + + (functions, calls) = generate( + input_type, + input_layout, + kernel_type, + kernel_layout, + conv2d_attr, + acc_type, + shapes_id, + ) + + write_code_file(functions, args.output_conv2d_mlir) + write_calls_file( + functions, + calls, + args.output_calls_mlir, + args.requirements, + ) + + +if __name__ == "__main__": + main(parse_arguments()) diff --git a/linalg_ops/convolution/generate_test_mlir_files.sh b/linalg_ops/convolution/generate_test_mlir_files.sh new file mode 100755 index 0000000..69aea7d --- /dev/null +++ b/linalg_ops/convolution/generate_test_mlir_files.sh @@ -0,0 +1,87 @@ +#!/bin/bash + +# Copyright 2024 The IREE Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# This script runs generate_e2e_conv2d_tests for all argument combinations that +# we are interested in testing. +# +# The output is a 'generated' folder with contents like this: +# linalg_ops/ +# convolution/ +# generated/ +# f16_nchw_f16_fchw_f16/ +# conv2d_f16_nchw_f16_fchw_f16_large_calls.mlir +# conv2d_f16_nchw_f16_fchw_f16_large.mlir +# conv2d_f16_nchw_f16_fchw_f16_medium_calls.mlir +# conv2d_f16_nchw_f16_fchw_f16_medium.mlir +# conv2d_f16_nchw_f16_fchw_f16_small_calls.mlir +# conv2d_f16_nchw_f16_fchw_f16_small.mlir +# f16_nchw_f16_fchw_f32/ +# conv2d_f16_nchw_f16_fchw_f32_large_calls.mlir +# conv2d_f16_nchw_f16_fchw_f32_large.mlir +# conv2d_f16_nchw_f16_fchw_f32_medium_calls.mlir +# conv2d_f16_nchw_f16_fchw_f32_medium.mlir +# conv2d_f16_nchw_f16_fchw_f32_small_calls.mlir +# conv2d_f16_nchw_f16_fchw_f32_small.mlir +# ... +# ... +# Usage: +# generate_test_mlir_files.sh + +set -euo pipefail + +this_dir="$(cd $(dirname $0) && pwd)" +generated_dir_root="${this_dir}/generated" + +# Reset generated directory. +rm -rf ${generated_dir_root?} +mkdir -p ${generated_dir_root?} + +shapes=( + "small" + "medium" + "large" +) + +# input_type;input_layout;kernel_type;kernel_layout;acc_type +type_and_layout_combinations=( + "f16;nhwc;f16;hwcf;f16" + "f16;nchw;f16;fchw;f16" + "f16;nhwc;f16;hwcf;f32" + "f16;nchw;f16;fchw;f32" + "f32;nhwc;f32;hwcf;f32" + "f32;nchw;f32;fchw;f32" + "i8;nhwc;i8;hwcf;i32" + "i8;nchw;i8;fchw;i32" +) + +for type_and_layout_combination in ${type_and_layout_combinations[@]}; do + IFS=";" read -r -a combination <<< "${type_and_layout_combination}" + input_type="${combination[0]}" + input_layout="${combination[1]}" + kernel_type="${combination[2]}" + kernel_layout="${combination[3]}" + acc_type="${combination[4]}" + + type_layout_name="${input_type}_${input_layout}_${kernel_type}_${kernel_layout}_${acc_type}" + type_combination_dir="${generated_dir_root}/${type_layout_name}" + mkdir -p ${type_combination_dir} + + for shape in ${shapes[@]}; do + echo "Generating conv2d test files for ${type_layout_name}_${shape}" + name="conv2d_${type_layout_name}_${shape}" + python ${this_dir}/generate_e2e_conv2d_tests.py \ + --output_conv2d_mlir=${type_combination_dir}/${name}.mlir \ + --output_calls_mlir=${type_combination_dir}/${name}_calls.mlir \ + --input_type=${input_type} \ + --input_layout=${input_layout} \ + --kernel_type=${kernel_type} \ + --kernel_layout=${kernel_layout} \ + --acc_type=${acc_type} \ + --shapes=${shape} + done +done diff --git a/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_large.mlir b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_large.mlir new file mode 100644 index 0000000..ca13bae --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_large.mlir @@ -0,0 +1,8 @@ +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16(%lhs: tensor<2x4x128x128xf16>, %rhs: tensor<8x4x3x3xf16>, %acc: tensor<2x8x126x126xf16>) -> tensor<2x8x126x126xf16> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x4x128x128xf16>, tensor<8x4x3x3xf16>) outs(%acc: tensor<2x8x126x126xf16>) -> tensor<2x8x126x126xf16> + return %result: tensor<2x8x126x126xf16> +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16(%lhs: tensor<2x3x128x128xf16>, %rhs: tensor<12x3x3x3xf16>, %acc: tensor<2x12x126x126xf16>) -> tensor<2x12x126x126xf16> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x3x128x128xf16>, tensor<12x3x3x3xf16>) outs(%acc: tensor<2x12x126x126xf16>) -> tensor<2x12x126x126xf16> + return %result: tensor<2x12x126x126xf16> +} diff --git a/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_large_calls.mlir b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_large_calls.mlir new file mode 100644 index 0000000..ee31f04 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_large_calls.mlir @@ -0,0 +1,108 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16_2_4_128_128_8_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x4x128x128x8x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 4 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 128 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 8 : i64 + %kernel_dim1 = arith.constant 4 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 126 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 126 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 4 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 8 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16_2_3_128_128_12_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x3x128x128x12x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 3 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 128 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 12 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 12 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 126 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 12 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 126 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 3 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 12 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_medium.mlir b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_medium.mlir new file mode 100644 index 0000000..b630d29 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_medium.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%lhs: tensor<2x2x32x32xf16>, %rhs: tensor<2x2x3x3xf16>, %acc: tensor<2x2x30x30xf16>) -> tensor<2x2x30x30xf16> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<2x2x3x3xf16>) outs(%acc: tensor<2x2x30x30xf16>) -> tensor<2x2x30x30xf16> + return %result: tensor<2x2x30x30xf16> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16(%lhs: tensor<2x2x32x32xf16>, %rhs: tensor<64x2x3x3xf16>, %acc: tensor<2x64x30x30xf16>) -> tensor<2x64x30x30xf16> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<64x2x3x3xf16>) outs(%acc: tensor<2x64x30x30xf16>) -> tensor<2x64x30x30xf16> + return %result: tensor<2x64x30x30xf16> +} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f16(%lhs: tensor<2x16x32x32xf16>, %rhs: tensor<64x16x3x3xf16>, %acc: tensor<2x64x30x30xf16>) -> tensor<2x64x30x30xf16> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x16x32x32xf16>, tensor<64x16x3x3xf16>) outs(%acc: tensor<2x64x30x30xf16>) -> tensor<2x64x30x30xf16> + return %result: tensor<2x64x30x30xf16> +} diff --git a/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_medium_calls.mlir b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_medium_calls.mlir new file mode 100644 index 0000000..1298b5b --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_medium_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16_2_2_32_32_2_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16_2_2_32_32_64_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 64 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 64 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 64 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f16_2_16_32_32_64_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x16x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 16 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 64 : i64 + %kernel_dim1 = arith.constant 16 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 64 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 64 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 16 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_small.mlir b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_small.mlir new file mode 100644 index 0000000..66fe7fd --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_small.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16(%lhs: tensor<1x1x1x1xf16>, %rhs: tensor<1x1x1x1xf16>, %acc: tensor<1x1x1x1xf16>) -> tensor<1x1x1x1xf16> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x1x1xf16>, tensor<1x1x1x1xf16>) outs(%acc: tensor<1x1x1x1xf16>) -> tensor<1x1x1x1xf16> + return %result: tensor<1x1x1x1xf16> +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16(%lhs: tensor<1x1x16x16xf16>, %rhs: tensor<1x1x2x2xf16>, %acc: tensor<1x1x15x15xf16>) -> tensor<1x1x15x15xf16> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x16x16xf16>, tensor<1x1x2x2xf16>) outs(%acc: tensor<1x1x15x15xf16>) -> tensor<1x1x15x15xf16> + return %result: tensor<1x1x15x15xf16> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%lhs: tensor<2x2x32x32xf16>, %rhs: tensor<2x2x3x3xf16>, %acc: tensor<2x2x30x30xf16>) -> tensor<2x2x30x30xf16> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<2x2x3x3xf16>) outs(%acc: tensor<2x2x30x30xf16>) -> tensor<2x2x30x30xf16> + return %result: tensor<2x2x30x30xf16> +} diff --git a/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_small_calls.mlir b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_small_calls.mlir new file mode 100644 index 0000000..98438c6 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f16/conv2d_f16_nchw_f16_fchw_f16_small_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16_1_1_1_1_1_1_1_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x1x1x1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 1 : i64 + %input_dim3 = arith.constant 1 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 1 : i64 + %kernel_dim3 = arith.constant 1 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 1 : i64 + %acc_dim3 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 1 : i64 + %acc_copy_dim3 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 1 : i64 + %w = arith.constant 1 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 1 : i64 + %kw = arith.constant 1 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16_1_1_16_16_1_2_2_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x16x16x1x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 16 : i64 + %input_dim3 = arith.constant 16 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 2 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 15 : i64 + %acc_dim3 = arith.constant 15 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 15 : i64 + %acc_copy_dim3 = arith.constant 15 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 16 : i64 + %w = arith.constant 16 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 2 : i64 + %kw = arith.constant 2 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16_2_2_32_32_2_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_large.mlir b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_large.mlir new file mode 100644 index 0000000..21afe9d --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_large.mlir @@ -0,0 +1,8 @@ +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f32(%lhs: tensor<2x4x128x128xf16>, %rhs: tensor<8x4x3x3xf16>, %acc: tensor<2x8x126x126xf32>) -> tensor<2x8x126x126xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x4x128x128xf16>, tensor<8x4x3x3xf16>) outs(%acc: tensor<2x8x126x126xf32>) -> tensor<2x8x126x126xf32> + return %result: tensor<2x8x126x126xf32> +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f32(%lhs: tensor<2x3x128x128xf16>, %rhs: tensor<12x3x3x3xf16>, %acc: tensor<2x12x126x126xf32>) -> tensor<2x12x126x126xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x3x128x128xf16>, tensor<12x3x3x3xf16>) outs(%acc: tensor<2x12x126x126xf32>) -> tensor<2x12x126x126xf32> + return %result: tensor<2x12x126x126xf32> +} diff --git a/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_large_calls.mlir b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_large_calls.mlir new file mode 100644 index 0000000..34fdff2 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_large_calls.mlir @@ -0,0 +1,108 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f32_2_4_128_128_8_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x4x128x128x8x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 4 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 128 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 8 : i64 + %kernel_dim1 = arith.constant 4 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 126 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 126 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 4 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 8 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f32_2_3_128_128_12_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x3x128x128x12x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 3 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 128 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 12 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 12 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 126 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 12 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 126 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 3 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 12 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_medium.mlir b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_medium.mlir new file mode 100644 index 0000000..0f9d9df --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_medium.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%lhs: tensor<2x2x32x32xf16>, %rhs: tensor<2x2x3x3xf16>, %acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<2x2x3x3xf16>) outs(%acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> + return %result: tensor<2x2x30x30xf32> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32(%lhs: tensor<2x2x32x32xf16>, %rhs: tensor<64x2x3x3xf16>, %acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<64x2x3x3xf16>) outs(%acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> + return %result: tensor<2x64x30x30xf32> +} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f32(%lhs: tensor<2x16x32x32xf16>, %rhs: tensor<64x16x3x3xf16>, %acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x16x32x32xf16>, tensor<64x16x3x3xf16>) outs(%acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> + return %result: tensor<2x64x30x30xf32> +} diff --git a/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_medium_calls.mlir b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_medium_calls.mlir new file mode 100644 index 0000000..f8798e9 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_medium_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32_2_2_32_32_2_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32_2_2_32_32_64_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 64 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 64 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 64 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f32_2_16_32_32_64_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x16x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 16 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 64 : i64 + %kernel_dim1 = arith.constant 16 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 64 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 64 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 16 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_small.mlir b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_small.mlir new file mode 100644 index 0000000..f2d0ea0 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_small.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f32(%lhs: tensor<1x1x1x1xf16>, %rhs: tensor<1x1x1x1xf16>, %acc: tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x1x1xf16>, tensor<1x1x1x1xf16>) outs(%acc: tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32> + return %result: tensor<1x1x1x1xf32> +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f32(%lhs: tensor<1x1x16x16xf16>, %rhs: tensor<1x1x2x2xf16>, %acc: tensor<1x1x15x15xf32>) -> tensor<1x1x15x15xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x16x16xf16>, tensor<1x1x2x2xf16>) outs(%acc: tensor<1x1x15x15xf32>) -> tensor<1x1x15x15xf32> + return %result: tensor<1x1x15x15xf32> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%lhs: tensor<2x2x32x32xf16>, %rhs: tensor<2x2x3x3xf16>, %acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf16>, tensor<2x2x3x3xf16>) outs(%acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> + return %result: tensor<2x2x30x30xf32> +} diff --git a/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_small_calls.mlir b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_small_calls.mlir new file mode 100644 index 0000000..5c160c6 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_small_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f32_1_1_1_1_1_1_1_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x1x1x1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 1 : i64 + %input_dim3 = arith.constant 1 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 1 : i64 + %kernel_dim3 = arith.constant 1 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 1 : i64 + %acc_dim3 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 1 : i64 + %acc_copy_dim3 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 1 : i64 + %w = arith.constant 1 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 1 : i64 + %kw = arith.constant 1 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f32_1_1_16_16_1_2_2_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x16x16x1x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 16 : i64 + %input_dim3 = arith.constant 16 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 2 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 15 : i64 + %acc_dim3 = arith.constant 15 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 15 : i64 + %acc_copy_dim3 = arith.constant 15 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 16 : i64 + %w = arith.constant 16 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 2 : i64 + %kw = arith.constant 2 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32_2_2_32_32_2_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_large.mlir b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_large.mlir new file mode 100644 index 0000000..17eb9e8 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_large.mlir @@ -0,0 +1,8 @@ +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16(%lhs: tensor<2x128x128x4xf16>, %rhs: tensor<3x3x4x8xf16>, %acc: tensor<2x126x126x8xf16>) -> tensor<2x126x126x8xf16> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x128x128x4xf16>, tensor<3x3x4x8xf16>) outs(%acc: tensor<2x126x126x8xf16>) -> tensor<2x126x126x8xf16> + return %result: tensor<2x126x126x8xf16> +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16(%lhs: tensor<2x128x128x3xf16>, %rhs: tensor<3x3x3x12xf16>, %acc: tensor<2x126x126x12xf16>) -> tensor<2x126x126x12xf16> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x128x128x3xf16>, tensor<3x3x3x12xf16>) outs(%acc: tensor<2x126x126x12xf16>) -> tensor<2x126x126x12xf16> + return %result: tensor<2x126x126x12xf16> +} diff --git a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_large_calls.mlir b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_large_calls.mlir new file mode 100644 index 0000000..b07a2e5 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_large_calls.mlir @@ -0,0 +1,108 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16_2_4_128_128_8_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x4x128x128x8x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 128 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 4 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 4 : i64 + %kernel_dim3 = arith.constant 8 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 126 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 126 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 4 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 8 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16_2_3_128_128_12_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x3x128x128x12x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 128 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 3 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 12 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 126 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 12 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 126 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 12 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 3 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 12 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_medium.mlir b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_medium.mlir new file mode 100644 index 0000000..addb8a2 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_medium.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%lhs: tensor<2x32x32x2xf16>, %rhs: tensor<3x3x2x2xf16>, %acc: tensor<2x30x30x2xf16>) -> tensor<2x30x30x2xf16> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x2xf16>, tensor<3x3x2x2xf16>) outs(%acc: tensor<2x30x30x2xf16>) -> tensor<2x30x30x2xf16> + return %result: tensor<2x30x30x2xf16> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16(%lhs: tensor<2x32x32x2xf16>, %rhs: tensor<3x3x2x64xf16>, %acc: tensor<2x30x30x64xf16>) -> tensor<2x30x30x64xf16> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x2xf16>, tensor<3x3x2x64xf16>) outs(%acc: tensor<2x30x30x64xf16>) -> tensor<2x30x30x64xf16> + return %result: tensor<2x30x30x64xf16> +} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f16(%lhs: tensor<2x32x32x16xf16>, %rhs: tensor<3x3x16x64xf16>, %acc: tensor<2x30x30x64xf16>) -> tensor<2x30x30x64xf16> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x16xf16>, tensor<3x3x16x64xf16>) outs(%acc: tensor<2x30x30x64xf16>) -> tensor<2x30x30x64xf16> + return %result: tensor<2x30x30x64xf16> +} diff --git a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_medium_calls.mlir b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_medium_calls.mlir new file mode 100644 index 0000000..17ee9c1 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_medium_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16_2_2_32_32_2_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 2 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 2 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16_2_2_32_32_64_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 2 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 64 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 64 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 64 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f16_2_16_32_32_64_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x16x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 16 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 16 : i64 + %kernel_dim3 = arith.constant 64 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 64 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 64 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 16 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_small.mlir b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_small.mlir new file mode 100644 index 0000000..b3bf59e --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_small.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16(%lhs: tensor<1x1x1x1xf16>, %rhs: tensor<1x1x1x1xf16>, %acc: tensor<1x1x1x1xf16>) -> tensor<1x1x1x1xf16> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x1x1xf16>, tensor<1x1x1x1xf16>) outs(%acc: tensor<1x1x1x1xf16>) -> tensor<1x1x1x1xf16> + return %result: tensor<1x1x1x1xf16> +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16(%lhs: tensor<1x16x16x1xf16>, %rhs: tensor<2x2x1x1xf16>, %acc: tensor<1x15x15x1xf16>) -> tensor<1x15x15x1xf16> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x16x16x1xf16>, tensor<2x2x1x1xf16>) outs(%acc: tensor<1x15x15x1xf16>) -> tensor<1x15x15x1xf16> + return %result: tensor<1x15x15x1xf16> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%lhs: tensor<2x32x32x2xf16>, %rhs: tensor<3x3x2x2xf16>, %acc: tensor<2x30x30x2xf16>) -> tensor<2x30x30x2xf16> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x2xf16>, tensor<3x3x2x2xf16>) outs(%acc: tensor<2x30x30x2xf16>) -> tensor<2x30x30x2xf16> + return %result: tensor<2x30x30x2xf16> +} diff --git a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_small_calls.mlir b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_small_calls.mlir new file mode 100644 index 0000000..f50584a --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f16/conv2d_f16_nhwc_f16_hwcf_f16_small_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16_1_1_1_1_1_1_1_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x1x1x1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 1 : i64 + %input_dim3 = arith.constant 1 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 1 : i64 + %kernel_dim3 = arith.constant 1 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 1 : i64 + %acc_dim3 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 1 : i64 + %acc_copy_dim3 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 1 : i64 + %w = arith.constant 1 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 1 : i64 + %kw = arith.constant 1 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16_1_1_16_16_1_2_2_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x16x16x1x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 16 : i64 + %input_dim2 = arith.constant 16 : i64 + %input_dim3 = arith.constant 1 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 1 : i64 + %kernel_dim3 = arith.constant 1 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 15 : i64 + %acc_dim2 = arith.constant 15 : i64 + %acc_dim3 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 15 : i64 + %acc_copy_dim2 = arith.constant 15 : i64 + %acc_copy_dim3 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 16 : i64 + %w = arith.constant 16 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 2 : i64 + %kw = arith.constant 2 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16_2_2_32_32_2_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 2 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 2 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f16(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_large.mlir b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_large.mlir new file mode 100644 index 0000000..2a7b2f2 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_large.mlir @@ -0,0 +1,8 @@ +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f32(%lhs: tensor<2x128x128x4xf16>, %rhs: tensor<3x3x4x8xf16>, %acc: tensor<2x126x126x8xf32>) -> tensor<2x126x126x8xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x128x128x4xf16>, tensor<3x3x4x8xf16>) outs(%acc: tensor<2x126x126x8xf32>) -> tensor<2x126x126x8xf32> + return %result: tensor<2x126x126x8xf32> +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f32(%lhs: tensor<2x128x128x3xf16>, %rhs: tensor<3x3x3x12xf16>, %acc: tensor<2x126x126x12xf32>) -> tensor<2x126x126x12xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x128x128x3xf16>, tensor<3x3x3x12xf16>) outs(%acc: tensor<2x126x126x12xf32>) -> tensor<2x126x126x12xf32> + return %result: tensor<2x126x126x12xf32> +} diff --git a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_large_calls.mlir b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_large_calls.mlir new file mode 100644 index 0000000..cd7d928 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_large_calls.mlir @@ -0,0 +1,108 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f32_2_4_128_128_8_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x4x128x128x8x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 128 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 4 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 4 : i64 + %kernel_dim3 = arith.constant 8 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 126 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 126 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 4 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 8 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f32_2_3_128_128_12_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x3x128x128x12x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 128 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 3 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 12 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 126 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 12 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 126 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 12 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 3 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 12 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_medium.mlir b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_medium.mlir new file mode 100644 index 0000000..d140187 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_medium.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%lhs: tensor<2x32x32x2xf16>, %rhs: tensor<3x3x2x2xf16>, %acc: tensor<2x30x30x2xf32>) -> tensor<2x30x30x2xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x2xf16>, tensor<3x3x2x2xf16>) outs(%acc: tensor<2x30x30x2xf32>) -> tensor<2x30x30x2xf32> + return %result: tensor<2x30x30x2xf32> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32(%lhs: tensor<2x32x32x2xf16>, %rhs: tensor<3x3x2x64xf16>, %acc: tensor<2x30x30x64xf32>) -> tensor<2x30x30x64xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x2xf16>, tensor<3x3x2x64xf16>) outs(%acc: tensor<2x30x30x64xf32>) -> tensor<2x30x30x64xf32> + return %result: tensor<2x30x30x64xf32> +} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f32(%lhs: tensor<2x32x32x16xf16>, %rhs: tensor<3x3x16x64xf16>, %acc: tensor<2x30x30x64xf32>) -> tensor<2x30x30x64xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x16xf16>, tensor<3x3x16x64xf16>) outs(%acc: tensor<2x30x30x64xf32>) -> tensor<2x30x30x64xf32> + return %result: tensor<2x30x30x64xf32> +} diff --git a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_medium_calls.mlir b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_medium_calls.mlir new file mode 100644 index 0000000..548e7ad --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_medium_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32_2_2_32_32_2_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 2 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 2 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32_2_2_32_32_64_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 2 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 64 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 64 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 64 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f32_2_16_32_32_64_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x16x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 16 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 16 : i64 + %kernel_dim3 = arith.constant 64 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 64 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 64 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 16 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_small.mlir b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_small.mlir new file mode 100644 index 0000000..59e9504 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_small.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f32(%lhs: tensor<1x1x1x1xf16>, %rhs: tensor<1x1x1x1xf16>, %acc: tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x1x1xf16>, tensor<1x1x1x1xf16>) outs(%acc: tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32> + return %result: tensor<1x1x1x1xf32> +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f32(%lhs: tensor<1x16x16x1xf16>, %rhs: tensor<2x2x1x1xf16>, %acc: tensor<1x15x15x1xf32>) -> tensor<1x15x15x1xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x16x16x1xf16>, tensor<2x2x1x1xf16>) outs(%acc: tensor<1x15x15x1xf32>) -> tensor<1x15x15x1xf32> + return %result: tensor<1x15x15x1xf32> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%lhs: tensor<2x32x32x2xf16>, %rhs: tensor<3x3x2x2xf16>, %acc: tensor<2x30x30x2xf32>) -> tensor<2x30x30x2xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x2xf16>, tensor<3x3x2x2xf16>) outs(%acc: tensor<2x30x30x2xf32>) -> tensor<2x30x30x2xf32> + return %result: tensor<2x30x30x2xf32> +} diff --git a/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_small_calls.mlir b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_small_calls.mlir new file mode 100644 index 0000000..6a9ab15 --- /dev/null +++ b/linalg_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_small_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f32_1_1_1_1_1_1_1_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x1x1x1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 1 : i64 + %input_dim3 = arith.constant 1 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 1 : i64 + %kernel_dim3 = arith.constant 1 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 1 : i64 + %acc_dim3 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 1 : i64 + %acc_copy_dim3 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 1 : i64 + %w = arith.constant 1 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 1 : i64 + %kw = arith.constant 1 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f32_1_1_16_16_1_2_2_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x16x16x1x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 16 : i64 + %input_dim2 = arith.constant 16 : i64 + %input_dim3 = arith.constant 1 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 1 : i64 + %kernel_dim3 = arith.constant 1 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 15 : i64 + %acc_dim2 = arith.constant 15 : i64 + %acc_dim3 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 15 : i64 + %acc_copy_dim2 = arith.constant 15 : i64 + %acc_copy_dim3 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 16 : i64 + %w = arith.constant 16 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 2 : i64 + %kw = arith.constant 2 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32_2_2_32_32_2_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 2 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 2 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_large.mlir b/linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_large.mlir new file mode 100644 index 0000000..1714e5b --- /dev/null +++ b/linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_large.mlir @@ -0,0 +1,8 @@ +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32(%lhs: tensor<2x4x128x128xf32>, %rhs: tensor<8x4x3x3xf32>, %acc: tensor<2x8x126x126xf32>) -> tensor<2x8x126x126xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x4x128x128xf32>, tensor<8x4x3x3xf32>) outs(%acc: tensor<2x8x126x126xf32>) -> tensor<2x8x126x126xf32> + return %result: tensor<2x8x126x126xf32> +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32(%lhs: tensor<2x3x128x128xf32>, %rhs: tensor<12x3x3x3xf32>, %acc: tensor<2x12x126x126xf32>) -> tensor<2x12x126x126xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x3x128x128xf32>, tensor<12x3x3x3xf32>) outs(%acc: tensor<2x12x126x126xf32>) -> tensor<2x12x126x126xf32> + return %result: tensor<2x12x126x126xf32> +} diff --git a/linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_large_calls.mlir b/linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_large_calls.mlir new file mode 100644 index 0000000..ce81bc5 --- /dev/null +++ b/linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_large_calls.mlir @@ -0,0 +1,108 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32_2_4_128_128_8_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x4x128x128x8x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 4 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 128 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 8 : i64 + %kernel_dim1 = arith.constant 4 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 126 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 126 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 4 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 8 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32_2_3_128_128_12_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x3x128x128x12x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 3 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 128 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 12 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 12 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 126 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 12 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 126 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 3 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 12 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_medium.mlir b/linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_medium.mlir new file mode 100644 index 0000000..d074f1f --- /dev/null +++ b/linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_medium.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%lhs: tensor<2x2x32x32xf32>, %rhs: tensor<2x2x3x3xf32>, %acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf32>, tensor<2x2x3x3xf32>) outs(%acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> + return %result: tensor<2x2x30x30xf32> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32(%lhs: tensor<2x2x32x32xf32>, %rhs: tensor<64x2x3x3xf32>, %acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf32>, tensor<64x2x3x3xf32>) outs(%acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> + return %result: tensor<2x64x30x30xf32> +} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f32_f32_f32(%lhs: tensor<2x16x32x32xf32>, %rhs: tensor<64x16x3x3xf32>, %acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x16x32x32xf32>, tensor<64x16x3x3xf32>) outs(%acc: tensor<2x64x30x30xf32>) -> tensor<2x64x30x30xf32> + return %result: tensor<2x64x30x30xf32> +} diff --git a/linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_medium_calls.mlir b/linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_medium_calls.mlir new file mode 100644 index 0000000..092a825 --- /dev/null +++ b/linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_medium_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32_2_2_32_32_2_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32_2_2_32_32_64_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 64 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 64 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 64 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f32_f32_f32_2_16_32_32_64_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x16x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 16 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 64 : i64 + %kernel_dim1 = arith.constant 16 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 64 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 64 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 16 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_small.mlir b/linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_small.mlir new file mode 100644 index 0000000..a4a08ad --- /dev/null +++ b/linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_small.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32(%lhs: tensor<1x1x1x1xf32>, %rhs: tensor<1x1x1x1xf32>, %acc: tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x1x1xf32>, tensor<1x1x1x1xf32>) outs(%acc: tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32> + return %result: tensor<1x1x1x1xf32> +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32(%lhs: tensor<1x1x16x16xf32>, %rhs: tensor<1x1x2x2xf32>, %acc: tensor<1x1x15x15xf32>) -> tensor<1x1x15x15xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x16x16xf32>, tensor<1x1x2x2xf32>) outs(%acc: tensor<1x1x15x15xf32>) -> tensor<1x1x15x15xf32> + return %result: tensor<1x1x15x15xf32> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%lhs: tensor<2x2x32x32xf32>, %rhs: tensor<2x2x3x3xf32>, %acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xf32>, tensor<2x2x3x3xf32>) outs(%acc: tensor<2x2x30x30xf32>) -> tensor<2x2x30x30xf32> + return %result: tensor<2x2x30x30xf32> +} diff --git a/linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_small_calls.mlir b/linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_small_calls.mlir new file mode 100644 index 0000000..9f01130 --- /dev/null +++ b/linalg_ops/convolution/generated/f32_nchw_f32_fchw_f32/conv2d_f32_nchw_f32_fchw_f32_small_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32_1_1_1_1_1_1_1_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x1x1x1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 1 : i64 + %input_dim3 = arith.constant 1 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 1 : i64 + %kernel_dim3 = arith.constant 1 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 1 : i64 + %acc_dim3 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 1 : i64 + %acc_copy_dim3 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 1 : i64 + %w = arith.constant 1 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 1 : i64 + %kw = arith.constant 1 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32_1_1_16_16_1_2_2_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x16x16x1x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 16 : i64 + %input_dim3 = arith.constant 16 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 2 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 15 : i64 + %acc_dim3 = arith.constant 15 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 15 : i64 + %acc_copy_dim3 = arith.constant 15 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 16 : i64 + %w = arith.constant 16 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 2 : i64 + %kw = arith.constant 2 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32_2_2_32_32_2_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_large.mlir b/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_large.mlir new file mode 100644 index 0000000..0cdae51 --- /dev/null +++ b/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_large.mlir @@ -0,0 +1,8 @@ +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32(%lhs: tensor<2x128x128x4xf32>, %rhs: tensor<3x3x4x8xf32>, %acc: tensor<2x126x126x8xf32>) -> tensor<2x126x126x8xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x128x128x4xf32>, tensor<3x3x4x8xf32>) outs(%acc: tensor<2x126x126x8xf32>) -> tensor<2x126x126x8xf32> + return %result: tensor<2x126x126x8xf32> +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32(%lhs: tensor<2x128x128x3xf32>, %rhs: tensor<3x3x3x12xf32>, %acc: tensor<2x126x126x12xf32>) -> tensor<2x126x126x12xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x128x128x3xf32>, tensor<3x3x3x12xf32>) outs(%acc: tensor<2x126x126x12xf32>) -> tensor<2x126x126x12xf32> + return %result: tensor<2x126x126x12xf32> +} diff --git a/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_large_calls.mlir b/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_large_calls.mlir new file mode 100644 index 0000000..854a307 --- /dev/null +++ b/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_large_calls.mlir @@ -0,0 +1,108 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32_2_4_128_128_8_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x4x128x128x8x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 128 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 4 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 4 : i64 + %kernel_dim3 = arith.constant 8 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 126 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 126 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 4 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 8 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32_2_3_128_128_12_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x3x128x128x12x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 128 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 3 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 12 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 126 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 12 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 126 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 12 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 3 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 12 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_medium.mlir b/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_medium.mlir new file mode 100644 index 0000000..393c487 --- /dev/null +++ b/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_medium.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%lhs: tensor<2x32x32x2xf32>, %rhs: tensor<3x3x2x2xf32>, %acc: tensor<2x30x30x2xf32>) -> tensor<2x30x30x2xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x2xf32>, tensor<3x3x2x2xf32>) outs(%acc: tensor<2x30x30x2xf32>) -> tensor<2x30x30x2xf32> + return %result: tensor<2x30x30x2xf32> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32(%lhs: tensor<2x32x32x2xf32>, %rhs: tensor<3x3x2x64xf32>, %acc: tensor<2x30x30x64xf32>) -> tensor<2x30x30x64xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x2xf32>, tensor<3x3x2x64xf32>) outs(%acc: tensor<2x30x30x64xf32>) -> tensor<2x30x30x64xf32> + return %result: tensor<2x30x30x64xf32> +} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f32_f32_f32(%lhs: tensor<2x32x32x16xf32>, %rhs: tensor<3x3x16x64xf32>, %acc: tensor<2x30x30x64xf32>) -> tensor<2x30x30x64xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x16xf32>, tensor<3x3x16x64xf32>) outs(%acc: tensor<2x30x30x64xf32>) -> tensor<2x30x30x64xf32> + return %result: tensor<2x30x30x64xf32> +} diff --git a/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_medium_calls.mlir b/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_medium_calls.mlir new file mode 100644 index 0000000..5043f0d --- /dev/null +++ b/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_medium_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32_2_2_32_32_2_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 2 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 2 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32_2_2_32_32_64_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 2 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 64 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 64 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 64 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f32_f32_f32_2_16_32_32_64_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x16x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 16 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 16 : i64 + %kernel_dim3 = arith.constant 64 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 64 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 64 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 16 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_small.mlir b/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_small.mlir new file mode 100644 index 0000000..ea9d92c --- /dev/null +++ b/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_small.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32(%lhs: tensor<1x1x1x1xf32>, %rhs: tensor<1x1x1x1xf32>, %acc: tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x1x1xf32>, tensor<1x1x1x1xf32>) outs(%acc: tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32> + return %result: tensor<1x1x1x1xf32> +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32(%lhs: tensor<1x16x16x1xf32>, %rhs: tensor<2x2x1x1xf32>, %acc: tensor<1x15x15x1xf32>) -> tensor<1x15x15x1xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x16x16x1xf32>, tensor<2x2x1x1xf32>) outs(%acc: tensor<1x15x15x1xf32>) -> tensor<1x15x15x1xf32> + return %result: tensor<1x15x15x1xf32> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%lhs: tensor<2x32x32x2xf32>, %rhs: tensor<3x3x2x2xf32>, %acc: tensor<2x30x30x2xf32>) -> tensor<2x30x30x2xf32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x2xf32>, tensor<3x3x2x2xf32>) outs(%acc: tensor<2x30x30x2xf32>) -> tensor<2x30x30x2xf32> + return %result: tensor<2x30x30x2xf32> +} diff --git a/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_small_calls.mlir b/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_small_calls.mlir new file mode 100644 index 0000000..b25c720 --- /dev/null +++ b/linalg_ops/convolution/generated/f32_nhwc_f32_hwcf_f32/conv2d_f32_nhwc_f32_hwcf_f32_small_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32_1_1_1_1_1_1_1_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x1x1x1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 1 : i64 + %input_dim3 = arith.constant 1 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 1 : i64 + %kernel_dim3 = arith.constant 1 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 1 : i64 + %acc_dim3 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 1 : i64 + %acc_copy_dim3 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 1 : i64 + %w = arith.constant 1 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 1 : i64 + %kw = arith.constant 1 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32_1_1_16_16_1_2_2_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x16x16x1x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 16 : i64 + %input_dim2 = arith.constant 16 : i64 + %input_dim3 = arith.constant 1 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 1 : i64 + %kernel_dim3 = arith.constant 1 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 15 : i64 + %acc_dim2 = arith.constant 15 : i64 + %acc_dim3 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 15 : i64 + %acc_copy_dim2 = arith.constant 15 : i64 + %acc_copy_dim3 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 16 : i64 + %w = arith.constant 16 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 2 : i64 + %kw = arith.constant 2 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32_2_2_32_32_2_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 2 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 2 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_f32_f32_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_large.mlir b/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_large.mlir new file mode 100644 index 0000000..706848a --- /dev/null +++ b/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_large.mlir @@ -0,0 +1,8 @@ +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_i8_i8_i32(%lhs: tensor<2x4x128x128xi8>, %rhs: tensor<8x4x3x3xi8>, %acc: tensor<2x8x126x126xi32>) -> tensor<2x8x126x126xi32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x4x128x128xi8>, tensor<8x4x3x3xi8>) outs(%acc: tensor<2x8x126x126xi32>) -> tensor<2x8x126x126xi32> + return %result: tensor<2x8x126x126xi32> +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_i8_i8_i32(%lhs: tensor<2x3x128x128xi8>, %rhs: tensor<12x3x3x3xi8>, %acc: tensor<2x12x126x126xi32>) -> tensor<2x12x126x126xi32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x3x128x128xi8>, tensor<12x3x3x3xi8>) outs(%acc: tensor<2x12x126x126xi32>) -> tensor<2x12x126x126xi32> + return %result: tensor<2x12x126x126xi32> +} diff --git a/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_large_calls.mlir b/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_large_calls.mlir new file mode 100644 index 0000000..af106a6 --- /dev/null +++ b/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_large_calls.mlir @@ -0,0 +1,108 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_i8_i8_i32_2_4_128_128_8_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x4x128x128x8x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 4 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 128 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 8 : i64 + %kernel_dim1 = arith.constant 4 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 8 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 126 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 8 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 126 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 4 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 8 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_i8_i8_i32_2_3_128_128_12_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x3x128x128x12x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 3 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 128 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 12 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 12 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 126 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 12 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 126 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 3 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 12 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_medium.mlir b/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_medium.mlir new file mode 100644 index 0000000..780c670 --- /dev/null +++ b/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_medium.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32(%lhs: tensor<2x2x32x32xi8>, %rhs: tensor<2x2x3x3xi8>, %acc: tensor<2x2x30x30xi32>) -> tensor<2x2x30x30xi32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xi8>, tensor<2x2x3x3xi8>) outs(%acc: tensor<2x2x30x30xi32>) -> tensor<2x2x30x30xi32> + return %result: tensor<2x2x30x30xi32> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_i8_i8_i32(%lhs: tensor<2x2x32x32xi8>, %rhs: tensor<64x2x3x3xi8>, %acc: tensor<2x64x30x30xi32>) -> tensor<2x64x30x30xi32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xi8>, tensor<64x2x3x3xi8>) outs(%acc: tensor<2x64x30x30xi32>) -> tensor<2x64x30x30xi32> + return %result: tensor<2x64x30x30xi32> +} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_i8_i8_i32(%lhs: tensor<2x16x32x32xi8>, %rhs: tensor<64x16x3x3xi8>, %acc: tensor<2x64x30x30xi32>) -> tensor<2x64x30x30xi32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x16x32x32xi8>, tensor<64x16x3x3xi8>) outs(%acc: tensor<2x64x30x30xi32>) -> tensor<2x64x30x30xi32> + return %result: tensor<2x64x30x30xi32> +} diff --git a/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_medium_calls.mlir b/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_medium_calls.mlir new file mode 100644 index 0000000..8c4dc85 --- /dev/null +++ b/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_medium_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32_2_2_32_32_2_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_i8_i8_i32_2_2_32_32_64_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 64 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 64 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 64 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_i8_i8_i32_2_16_32_32_64_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x16x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 16 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 64 : i64 + %kernel_dim1 = arith.constant 16 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 64 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 64 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 16 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_small.mlir b/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_small.mlir new file mode 100644 index 0000000..8acc310 --- /dev/null +++ b/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_small.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_i8_i8_i32(%lhs: tensor<1x1x1x1xi8>, %rhs: tensor<1x1x1x1xi8>, %acc: tensor<1x1x1x1xi32>) -> tensor<1x1x1x1xi32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x1x1xi8>, tensor<1x1x1x1xi8>) outs(%acc: tensor<1x1x1x1xi32>) -> tensor<1x1x1x1xi32> + return %result: tensor<1x1x1x1xi32> +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_i8_i8_i32(%lhs: tensor<1x1x16x16xi8>, %rhs: tensor<1x1x2x2xi8>, %acc: tensor<1x1x15x15xi32>) -> tensor<1x1x15x15xi32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x16x16xi8>, tensor<1x1x2x2xi8>) outs(%acc: tensor<1x1x15x15xi32>) -> tensor<1x1x15x15xi32> + return %result: tensor<1x1x15x15xi32> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32(%lhs: tensor<2x2x32x32xi8>, %rhs: tensor<2x2x3x3xi8>, %acc: tensor<2x2x30x30xi32>) -> tensor<2x2x30x30xi32> { + %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x2x32x32xi8>, tensor<2x2x3x3xi8>) outs(%acc: tensor<2x2x30x30xi32>) -> tensor<2x2x30x30xi32> + return %result: tensor<2x2x30x30xi32> +} diff --git a/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_small_calls.mlir b/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_small_calls.mlir new file mode 100644 index 0000000..6f68fe9 --- /dev/null +++ b/linalg_ops/convolution/generated/i8_nchw_i8_fchw_i32/conv2d_i8_nchw_i8_fchw_i32_small_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_i8_i8_i32_1_1_1_1_1_1_1_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x1x1x1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 1 : i64 + %input_dim3 = arith.constant 1 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 1 : i64 + %kernel_dim3 = arith.constant 1 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 1 : i64 + %acc_dim3 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 1 : i64 + %acc_copy_dim3 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 1 : i64 + %w = arith.constant 1 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 1 : i64 + %kw = arith.constant 1 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_i8_i8_i32_1_1_16_16_1_2_2_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x16x16x1x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 16 : i64 + %input_dim3 = arith.constant 16 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 2 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 15 : i64 + %acc_dim3 = arith.constant 15 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 15 : i64 + %acc_copy_dim3 = arith.constant 15 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 16 : i64 + %w = arith.constant 16 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 2 : i64 + %kw = arith.constant 2 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32_2_2_32_32_2_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 2 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 32 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 3 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 2 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 30 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 2 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 30 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 0 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_large.mlir b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_large.mlir new file mode 100644 index 0000000..99911df --- /dev/null +++ b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_large.mlir @@ -0,0 +1,8 @@ +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_i8_i8_i32(%lhs: tensor<2x128x128x4xi8>, %rhs: tensor<3x3x4x8xi8>, %acc: tensor<2x126x126x8xi32>) -> tensor<2x126x126x8xi32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x128x128x4xi8>, tensor<3x3x4x8xi8>) outs(%acc: tensor<2x126x126x8xi32>) -> tensor<2x126x126x8xi32> + return %result: tensor<2x126x126x8xi32> +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_i8_i8_i32(%lhs: tensor<2x128x128x3xi8>, %rhs: tensor<3x3x3x12xi8>, %acc: tensor<2x126x126x12xi32>) -> tensor<2x126x126x12xi32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x128x128x3xi8>, tensor<3x3x3x12xi8>) outs(%acc: tensor<2x126x126x12xi32>) -> tensor<2x126x126x12xi32> + return %result: tensor<2x126x126x12xi32> +} diff --git a/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_large_calls.mlir b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_large_calls.mlir new file mode 100644 index 0000000..a863eca --- /dev/null +++ b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_large_calls.mlir @@ -0,0 +1,108 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_i8_i8_i32_2_4_128_128_8_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x4x128x128x8x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 128 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 4 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 4 : i64 + %kernel_dim3 = arith.constant 8 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 126 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 8 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 126 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 8 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 4 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 8 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_i8_i8_i32_2_3_128_128_12_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x3x128x128x12x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 128 : i64 + %input_dim2 = arith.constant 128 : i64 + %input_dim3 = arith.constant 3 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 3 : i64 + %kernel_dim3 = arith.constant 12 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 126 : i64 + %acc_dim2 = arith.constant 126 : i64 + %acc_dim3 = arith.constant 12 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 126 : i64 + %acc_copy_dim2 = arith.constant 126 : i64 + %acc_copy_dim3 = arith.constant 12 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 3 : i64 + %h = arith.constant 128 : i64 + %w = arith.constant 128 : i64 + %f = arith.constant 12 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_medium.mlir b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_medium.mlir new file mode 100644 index 0000000..e64bc66 --- /dev/null +++ b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_medium.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32(%lhs: tensor<2x32x32x2xi8>, %rhs: tensor<3x3x2x2xi8>, %acc: tensor<2x30x30x2xi32>) -> tensor<2x30x30x2xi32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x2xi8>, tensor<3x3x2x2xi8>) outs(%acc: tensor<2x30x30x2xi32>) -> tensor<2x30x30x2xi32> + return %result: tensor<2x30x30x2xi32> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_i8_i8_i32(%lhs: tensor<2x32x32x2xi8>, %rhs: tensor<3x3x2x64xi8>, %acc: tensor<2x30x30x64xi32>) -> tensor<2x30x30x64xi32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x2xi8>, tensor<3x3x2x64xi8>) outs(%acc: tensor<2x30x30x64xi32>) -> tensor<2x30x30x64xi32> + return %result: tensor<2x30x30x64xi32> +} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_i8_i8_i32(%lhs: tensor<2x32x32x16xi8>, %rhs: tensor<3x3x16x64xi8>, %acc: tensor<2x30x30x64xi32>) -> tensor<2x30x30x64xi32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x16xi8>, tensor<3x3x16x64xi8>) outs(%acc: tensor<2x30x30x64xi32>) -> tensor<2x30x30x64xi32> + return %result: tensor<2x30x30x64xi32> +} diff --git a/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_medium_calls.mlir b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_medium_calls.mlir new file mode 100644 index 0000000..ea12edb --- /dev/null +++ b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_medium_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32_2_2_32_32_2_3_3_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 2 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 2 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_i8_i8_i32_2_2_32_32_64_3_3_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 2 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 64 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 64 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 64 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_64_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_i8_i8_i32_2_16_32_32_64_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x16x32x32x64x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 16 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 16 : i64 + %kernel_dim3 = arith.constant 64 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 64 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 64 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_16_32_32_times_3_3_64_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 16 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 64 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_small.mlir b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_small.mlir new file mode 100644 index 0000000..5d52f93 --- /dev/null +++ b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_small.mlir @@ -0,0 +1,12 @@ +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_i8_i8_i32(%lhs: tensor<1x1x1x1xi8>, %rhs: tensor<1x1x1x1xi8>, %acc: tensor<1x1x1x1xi32>) -> tensor<1x1x1x1xi32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x1x1x1xi8>, tensor<1x1x1x1xi8>) outs(%acc: tensor<1x1x1x1xi32>) -> tensor<1x1x1x1xi32> + return %result: tensor<1x1x1x1xi32> +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_i8_i8_i32(%lhs: tensor<1x16x16x1xi8>, %rhs: tensor<2x2x1x1xi8>, %acc: tensor<1x15x15x1xi32>) -> tensor<1x15x15x1xi32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<1x16x16x1xi8>, tensor<2x2x1x1xi8>) outs(%acc: tensor<1x15x15x1xi32>) -> tensor<1x15x15x1xi32> + return %result: tensor<1x15x15x1xi32> +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32(%lhs: tensor<2x32x32x2xi8>, %rhs: tensor<3x3x2x2xi8>, %acc: tensor<2x30x30x2xi32>) -> tensor<2x30x30x2xi32> { + %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x32x32x2xi8>, tensor<3x3x2x2xi8>) outs(%acc: tensor<2x30x30x2xi32>) -> tensor<2x30x30x2xi32> + return %result: tensor<2x30x30x2xi32> +} diff --git a/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_small_calls.mlir b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_small_calls.mlir new file mode 100644 index 0000000..da9b803 --- /dev/null +++ b/linalg_ops/convolution/generated/i8_nhwc_i8_hwcf_i32/conv2d_i8_nhwc_i8_hwcf_i32_small_calls.mlir @@ -0,0 +1,158 @@ +builtin.module @calls attributes { + +} { + +func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view +func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) +func.func private @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view +func.func private @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view + +func.func @conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_i8_i8_i32_1_1_1_1_1_1_1_acc_0() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x1x1x1x1x1"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 1 : i64 + %input_dim2 = arith.constant 1 : i64 + %input_dim3 = arith.constant 1 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 2 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 1 : i64 + %kernel_dim1 = arith.constant 1 : i64 + %kernel_dim2 = arith.constant 1 : i64 + %kernel_dim3 = arith.constant 1 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 3 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 1 : i64 + %acc_dim2 = arith.constant 1 : i64 + %acc_dim3 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 4 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 1 : i64 + %acc_copy_dim2 = arith.constant 1 : i64 + %acc_copy_dim3 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 4 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_1_1_times_1_1_1_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 1 : i64 + %w = arith.constant 1 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 1 : i64 + %kw = arith.constant 1 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_i8_i8_i32_1_1_16_16_1_2_2_acc_1() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 1x1x16x16x1x2x2"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 1 : i64 + %input_dim1 = arith.constant 16 : i64 + %input_dim2 = arith.constant 16 : i64 + %input_dim3 = arith.constant 1 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 5 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 2 : i64 + %kernel_dim1 = arith.constant 2 : i64 + %kernel_dim2 = arith.constant 1 : i64 + %kernel_dim3 = arith.constant 1 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 6 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 1 : i64 + %acc_dim1 = arith.constant 15 : i64 + %acc_dim2 = arith.constant 15 : i64 + %acc_dim3 = arith.constant 1 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 7 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 1 : i64 + %acc_copy_dim1 = arith.constant 15 : i64 + %acc_copy_dim2 = arith.constant 15 : i64 + %acc_copy_dim3 = arith.constant 1 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 7 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_1_1_16_16_times_2_2_1_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 1 : i64 + %c = arith.constant 1 : i64 + %h = arith.constant 16 : i64 + %w = arith.constant 16 : i64 + %f = arith.constant 1 : i64 + %kh = arith.constant 2 : i64 + %kw = arith.constant 2 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +func.func @conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32_2_2_32_32_2_3_3_acc_2() attributes { + iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x2x32x32x2x3x3"} +} { + %device_index = arith.constant 0 : index + %device = hal.devices.get %device_index : !hal.device + %input_dim0 = arith.constant 2 : i64 + %input_dim1 = arith.constant 32 : i64 + %input_dim2 = arith.constant 32 : i64 + %input_dim3 = arith.constant 2 : i64 + %input_element_type = hal.element_type : i32 + %input_seed = arith.constant 8 : i32 + %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %kernel_dim0 = arith.constant 3 : i64 + %kernel_dim1 = arith.constant 3 : i64 + %kernel_dim2 = arith.constant 2 : i64 + %kernel_dim3 = arith.constant 2 : i64 + %kernel_element_type = hal.element_type : i32 + %kernel_seed = arith.constant 9 : i32 + %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_dim0 = arith.constant 2 : i64 + %acc_dim1 = arith.constant 30 : i64 + %acc_dim2 = arith.constant 30 : i64 + %acc_dim3 = arith.constant 2 : i64 + %acc_element_type = hal.element_type : i32 + %acc_seed = arith.constant 10 : i32 + %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %acc_copy_dim0 = arith.constant 2 : i64 + %acc_copy_dim1 = arith.constant 30 : i64 + %acc_copy_dim2 = arith.constant 30 : i64 + %acc_copy_dim3 = arith.constant 2 : i64 + %acc_copy_element_type = hal.element_type : i32 + %acc_copy_seed = arith.constant 10 : i32 + %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view + %result = call @module.conv2d_accumulate_2_2_32_32_times_3_3_2_dtype_i8_i8_i32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view + %n = arith.constant 2 : i64 + %c = arith.constant 2 : i64 + %h = arith.constant 32 : i64 + %w = arith.constant 32 : i64 + %f = arith.constant 2 : i64 + %kh = arith.constant 3 : i64 + %kw = arith.constant 3 : i64 + %layout = arith.constant 1 : i64 + %sh = arith.constant 1 : i64 + %sw = arith.constant 1 : i64 + %dh = arith.constant 1 : i64 + %dw = arith.constant 1 : i64 + call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> () + return +} +} diff --git a/linalg_ops/iree-e2e-conv2d-test.cc b/linalg_ops/iree-e2e-conv2d-test.cc new file mode 100644 index 0000000..a8d2391 --- /dev/null +++ b/linalg_ops/iree-e2e-conv2d-test.cc @@ -0,0 +1,777 @@ +// Copyright 2024 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include + +#include "iree/base/api.h" +#include "iree/base/internal/flags.h" +#include "iree/base/internal/math.h" +#include "iree/hal/api.h" +#include "iree/modules/hal/module.h" +#include "iree/tooling/context_util.h" +#include "iree/tooling/device_util.h" +#include "iree/vm/api.h" +#include "iree/vm/native_module_cc.h" +#include "test_utils.h" + +//===----------------------------------------------------------------------===// +// Reference conv2d (NCHW-FCHW) and (NHWC-HWCF) +//===----------------------------------------------------------------------===// + +// Conversion from 4D indices in row major order to 1D index. +static int convert_to_1d_index(iree_hal_dim_t channels, iree_hal_dim_t height, + iree_hal_dim_t width, iree_hal_dim_t n, + iree_hal_dim_t c, iree_hal_dim_t h, + iree_hal_dim_t w) { + return n * (channels * height * width) + c * (height * width) + h * width + w; +} + +// [f16 <= f16 * f16 + f16] +static void reference_conv2d_f16_f16_f16_f16( + iree_hal_dim_t n_size, iree_hal_dim_t c_size, iree_hal_dim_t h_size, + iree_hal_dim_t w_size, iree_hal_dim_t f_size, iree_hal_dim_t kh_size, + iree_hal_dim_t kw_size, iree_hal_dim_t layout, iree_hal_dim_t sh_size, + iree_hal_dim_t sw_size, iree_hal_dim_t dh_size, iree_hal_dim_t dw_size, + iree_hal_dim_t oh_size, iree_hal_dim_t ow_size, const uint16_t *input_data, + const uint16_t *kernel_data, const uint16_t *acc_data, + uint16_t *result_data, iree_hal_dim_t n, iree_hal_dim_t oc, + iree_hal_dim_t oh, iree_hal_dim_t ow) { + if (layout == 0) { + // The layout of output tensor is NxfxOHxOW + iree_hal_dim_t out_idx = + convert_to_1d_index(f_size, oh_size, ow_size, n, oc, oh, ow); + + float acc = acc_data ? iree_math_f16_to_f32(acc_data[out_idx]) : 0.f; + + for (iree_hal_dim_t ic = 0; ic < c_size; ++ic) { + for (iree_hal_dim_t kh = 0; kh < kh_size; ++kh) { + for (iree_hal_dim_t kw = 0; kw < kw_size; ++kw) { + iree_hal_dim_t inp_idx = convert_to_1d_index( + c_size, h_size, w_size, n, ic, (oh * sh_size + kh * dh_size), + (ow * sw_size + kw * dw_size)); + iree_hal_dim_t krnl_idx = + convert_to_1d_index(c_size, kh_size, kw_size, oc, ic, kh, kw); + + acc += iree_math_f16_to_f32(input_data[inp_idx]) * + iree_math_f16_to_f32(kernel_data[krnl_idx]); + } + } + result_data[out_idx] = iree_math_f32_to_f16(acc); + } + } else if (layout == 1) { + // The layout of output tensor is NxOHxOWxf + iree_hal_dim_t out_idx = + convert_to_1d_index(oh_size, ow_size, f_size, n, oh, ow, oc); + + float acc = acc_data ? iree_math_f16_to_f32(acc_data[out_idx]) : 0.f; + + for (iree_hal_dim_t kh = 0; kh < kh_size; ++kh) { + for (iree_hal_dim_t kw = 0; kw < kw_size; ++kw) { + for (iree_hal_dim_t ic = 0; ic < c_size; ++ic) { + iree_hal_dim_t inp_idx = convert_to_1d_index( + h_size, w_size, c_size, n, (oh * sh_size + kh * dh_size), + (ow * sw_size + kw * dw_size), ic); + iree_hal_dim_t krnl_idx = + convert_to_1d_index(kw_size, c_size, f_size, kh, kw, ic, oc); + + acc += iree_math_f16_to_f32(input_data[inp_idx]) * + iree_math_f16_to_f32(kernel_data[krnl_idx]); + } + } + } + result_data[out_idx] = iree_math_f32_to_f16(acc); + } +} + +// [f32 <= f16 * f16 + f32] +static void reference_conv2d_f16_f16_f32_f32( + iree_hal_dim_t n_size, iree_hal_dim_t c_size, iree_hal_dim_t h_size, + iree_hal_dim_t w_size, iree_hal_dim_t f_size, iree_hal_dim_t kh_size, + iree_hal_dim_t kw_size, iree_hal_dim_t layout, iree_hal_dim_t sh_size, + iree_hal_dim_t sw_size, iree_hal_dim_t dh_size, iree_hal_dim_t dw_size, + iree_hal_dim_t oh_size, iree_hal_dim_t ow_size, const uint16_t *input_data, + const uint16_t *kernel_data, const float *acc_data, float *result_data, + iree_hal_dim_t n, iree_hal_dim_t oc, iree_hal_dim_t oh, iree_hal_dim_t ow) { + if (layout == 0) { + // The layout of output tensor is NxfxOHxOW + iree_hal_dim_t out_idx = + convert_to_1d_index(f_size, oh_size, ow_size, n, oc, oh, ow); + + float acc = acc_data ? acc_data[out_idx] : 0.f; + + for (iree_hal_dim_t ic = 0; ic < c_size; ++ic) { + for (iree_hal_dim_t kh = 0; kh < kh_size; ++kh) { + for (iree_hal_dim_t kw = 0; kw < kw_size; ++kw) { + iree_hal_dim_t inp_idx = convert_to_1d_index( + c_size, h_size, w_size, n, ic, (oh * sh_size + kh * dh_size), + (ow * sw_size + kw * dw_size)); + iree_hal_dim_t krnl_idx = + convert_to_1d_index(c_size, kh_size, kw_size, oc, ic, kh, kw); + + acc += iree_math_f16_to_f32(input_data[inp_idx]) * + iree_math_f16_to_f32(kernel_data[krnl_idx]); + } + } + result_data[out_idx] = acc; + } + } else if (layout == 1) { + // The layout of output tensor is NxOHxOWxf + iree_hal_dim_t out_idx = + convert_to_1d_index(oh_size, ow_size, f_size, n, oh, ow, oc); + + float acc = acc_data ? acc_data[out_idx] : 0.f; + + for (iree_hal_dim_t kh = 0; kh < kh_size; ++kh) { + for (iree_hal_dim_t kw = 0; kw < kw_size; ++kw) { + for (iree_hal_dim_t ic = 0; ic < c_size; ++ic) { + iree_hal_dim_t inp_idx = convert_to_1d_index( + h_size, w_size, c_size, n, (oh * sh_size + kh * dh_size), + (ow * sw_size + kw * dw_size), ic); + iree_hal_dim_t krnl_idx = + convert_to_1d_index(kw_size, c_size, f_size, kh, kw, ic, oc); + + acc += iree_math_f16_to_f32(input_data[inp_idx]) * + iree_math_f16_to_f32(kernel_data[krnl_idx]); + } + } + } + result_data[out_idx] = acc; + } +} + +// [i32 <= i8 * i8 + i32] +static void reference_conv2d_i8_i8_i32_i32( + iree_hal_dim_t n_size, iree_hal_dim_t c_size, iree_hal_dim_t h_size, + iree_hal_dim_t w_size, iree_hal_dim_t f_size, iree_hal_dim_t kh_size, + iree_hal_dim_t kw_size, iree_hal_dim_t layout, iree_hal_dim_t sh_size, + iree_hal_dim_t sw_size, iree_hal_dim_t dh_size, iree_hal_dim_t dw_size, + iree_hal_dim_t oh_size, iree_hal_dim_t ow_size, const int8_t *input_data, + const int8_t *kernel_data, const int32_t *acc_data, int32_t *result_data, + iree_hal_dim_t n, iree_hal_dim_t oc, iree_hal_dim_t oh, iree_hal_dim_t ow) { + if (layout == 0) { + // The layout of output tensor is NxfxOHxOW + iree_hal_dim_t out_idx = + convert_to_1d_index(f_size, oh_size, ow_size, n, oc, oh, ow); + + int32_t acc = acc_data ? acc_data[out_idx] : 0; + + for (iree_hal_dim_t ic = 0; ic < c_size; ++ic) { + for (iree_hal_dim_t kh = 0; kh < kh_size; ++kh) { + for (iree_hal_dim_t kw = 0; kw < kw_size; ++kw) { + iree_hal_dim_t inp_idx = convert_to_1d_index( + c_size, h_size, w_size, n, ic, (oh * sh_size + kh * dh_size), + (ow * sw_size + kw * dw_size)); + iree_hal_dim_t krnl_idx = + convert_to_1d_index(c_size, kh_size, kw_size, oc, ic, kh, kw); + + int8_t lhs_value = input_data[inp_idx]; + int8_t rhs_value = kernel_data[krnl_idx]; + acc += (int32_t)lhs_value * (int32_t)rhs_value; + } + } + result_data[out_idx] = acc; + } + } else if (layout == 1) { + // The layout of output tensor is NxOHxOWxf + iree_hal_dim_t out_idx = + convert_to_1d_index(oh_size, ow_size, f_size, n, oh, ow, oc); + + int32_t acc = acc_data ? acc_data[out_idx] : 0; + + for (iree_hal_dim_t kh = 0; kh < kh_size; ++kh) { + for (iree_hal_dim_t kw = 0; kw < kw_size; ++kw) { + for (iree_hal_dim_t ic = 0; ic < c_size; ++ic) { + iree_hal_dim_t inp_idx = convert_to_1d_index( + h_size, w_size, c_size, n, (oh * sh_size + kh * dh_size), + (ow * sw_size + kw * dw_size), ic); + iree_hal_dim_t krnl_idx = + convert_to_1d_index(kw_size, c_size, f_size, kh, kw, ic, oc); + + int8_t lhs_value = input_data[inp_idx]; + int8_t rhs_value = kernel_data[krnl_idx]; + acc += (int32_t)lhs_value * (int32_t)rhs_value; + } + } + } + result_data[out_idx] = acc; + } +} + +static void reference_conv2d_f32_f32_f32_f32( + iree_hal_dim_t n_size, iree_hal_dim_t c_size, iree_hal_dim_t h_size, + iree_hal_dim_t w_size, iree_hal_dim_t f_size, iree_hal_dim_t kh_size, + iree_hal_dim_t kw_size, iree_hal_dim_t layout, iree_hal_dim_t sh_size, + iree_hal_dim_t sw_size, iree_hal_dim_t dh_size, iree_hal_dim_t dw_size, + iree_hal_dim_t oh_size, iree_hal_dim_t ow_size, const float *input_data, + const float *kernel_data, const float *acc_data, float *result_data, + iree_hal_dim_t n, iree_hal_dim_t oc, iree_hal_dim_t oh, iree_hal_dim_t ow) { + if (layout == 0) { + // The layout of output tensor is NxfxOHxOW + iree_hal_dim_t out_idx = + convert_to_1d_index(f_size, oh_size, ow_size, n, oc, oh, ow); + + float acc = acc_data ? acc_data[out_idx] : 0; + + for (iree_hal_dim_t ic = 0; ic < c_size; ++ic) { + for (iree_hal_dim_t kh = 0; kh < kh_size; ++kh) { + for (iree_hal_dim_t kw = 0; kw < kw_size; ++kw) { + iree_hal_dim_t inp_idx = convert_to_1d_index( + c_size, h_size, w_size, n, ic, (oh * sh_size + kh * dh_size), + (ow * sw_size + kw * dw_size)); + iree_hal_dim_t krnl_idx = + convert_to_1d_index(c_size, kh_size, kw_size, oc, ic, kh, kw); + + acc += input_data[inp_idx] * kernel_data[krnl_idx]; + } + } + result_data[out_idx] = acc; + } + } else if (layout == 1) { + // The layout of output tensor is NxOHxOWxf + iree_hal_dim_t out_idx = + convert_to_1d_index(oh_size, ow_size, f_size, n, oh, ow, oc); + + float acc = acc_data ? acc_data[out_idx] : 0; + + for (iree_hal_dim_t kh = 0; kh < kh_size; ++kh) { + for (iree_hal_dim_t kw = 0; kw < kw_size; ++kw) { + for (iree_hal_dim_t ic = 0; ic < c_size; ++ic) { + iree_hal_dim_t inp_idx = convert_to_1d_index( + h_size, w_size, c_size, n, (oh * sh_size + kh * dh_size), + (ow * sw_size + kw * dw_size), ic); + iree_hal_dim_t krnl_idx = + convert_to_1d_index(kw_size, c_size, f_size, kh, kw, ic, oc); + acc += input_data[inp_idx] * kernel_data[krnl_idx]; + } + } + } + result_data[out_idx] = acc; + } +} + +// Helper for reference_conv2d. +static iree_status_t reference_conv2d_element( + iree_hal_dim_t n_size, iree_hal_dim_t c_size, iree_hal_dim_t h_size, + iree_hal_dim_t w_size, iree_hal_dim_t f_size, iree_hal_dim_t kh_size, + iree_hal_dim_t kw_size, iree_hal_dim_t layout, iree_hal_dim_t sh_size, + iree_hal_dim_t sw_size, iree_hal_dim_t dh_size, iree_hal_dim_t dw_size, + iree_hal_dim_t oh_size, iree_hal_dim_t ow_size, + iree_hal_element_type_t input_type, iree_hal_element_type_t kernel_type, + iree_hal_element_type_t acc_type, void *input_data, void *kernel_data, + void *acc_data, void *result_data, iree_hal_dim_t n, iree_hal_dim_t oc, + iree_hal_dim_t oh, iree_hal_dim_t ow) { + if (input_type == IREE_HAL_ELEMENT_TYPE_FLOAT_32 && + kernel_type == IREE_HAL_ELEMENT_TYPE_FLOAT_32 && + acc_type == IREE_HAL_ELEMENT_TYPE_FLOAT_32) { + reference_conv2d_f32_f32_f32_f32( + n_size, c_size, h_size, w_size, f_size, kh_size, kw_size, layout, + sh_size, sw_size, dh_size, dw_size, oh_size, ow_size, + (const float *)input_data, (const float *)kernel_data, + (const float *)acc_data, (float *)result_data, n, oc, oh, ow); + } else if (input_type == IREE_HAL_ELEMENT_TYPE_FLOAT_16 && + kernel_type == IREE_HAL_ELEMENT_TYPE_FLOAT_16 && + acc_type == IREE_HAL_ELEMENT_TYPE_FLOAT_16) { + reference_conv2d_f16_f16_f16_f16( + n_size, c_size, h_size, w_size, f_size, kh_size, kw_size, layout, + sh_size, sw_size, dh_size, dw_size, oh_size, ow_size, + (const uint16_t *)input_data, (const uint16_t *)kernel_data, + (const uint16_t *)acc_data, (uint16_t *)result_data, n, oc, oh, ow); + } else if (input_type == IREE_HAL_ELEMENT_TYPE_FLOAT_16 && + kernel_type == IREE_HAL_ELEMENT_TYPE_FLOAT_16 && + acc_type == IREE_HAL_ELEMENT_TYPE_FLOAT_32) { + reference_conv2d_f16_f16_f32_f32( + n_size, c_size, h_size, w_size, f_size, kh_size, kw_size, layout, + sh_size, sw_size, dh_size, dw_size, oh_size, ow_size, + (const uint16_t *)input_data, (const uint16_t *)kernel_data, + (const float *)acc_data, (float *)result_data, n, oc, oh, ow); + } else if (input_type == IREE_HAL_ELEMENT_TYPE_INT_8 && + kernel_type == IREE_HAL_ELEMENT_TYPE_INT_8 && + acc_type == IREE_HAL_ELEMENT_TYPE_INT_32) { + reference_conv2d_i8_i8_i32_i32( + n_size, c_size, h_size, w_size, f_size, kh_size, kw_size, layout, + sh_size, sw_size, dh_size, dw_size, oh_size, ow_size, + (const int8_t *)input_data, (const int8_t *)kernel_data, + (const int32_t *)acc_data, (int32_t *)result_data, n, oc, oh, ow); + } else { + return iree_make_status( + IREE_STATUS_INVALID_ARGUMENT, + "unhandled combination of element types in conv2d input_type: %d," + " kernel_type: %d, acc_type: %d", + input_type, kernel_type, acc_type); + } + return iree_ok_status(); +} + +// Calculate the output shape given the dilation and strides. +static iree_hal_dim_t out_shape_calc(iree_hal_dim_t i_shape, + iree_hal_dim_t k_shape, + iree_hal_dim_t stride, + iree_hal_dim_t dilation) { + iree_hal_dim_t x = (k_shape - 1) * (dilation - 1); + x = i_shape - k_shape - x; + return floor(x / stride) + 1; +} + +// Reference conv2d-NCHW-FCHW implementation, used to compare conv2d results +// against. +static iree_status_t reference_conv2d( + iree_hal_dim_t n_size, iree_hal_dim_t c_size, iree_hal_dim_t h_size, + iree_hal_dim_t w_size, iree_hal_dim_t f_size, iree_hal_dim_t kh_size, + iree_hal_dim_t kw_size, iree_hal_dim_t layout, iree_hal_dim_t sh_size, + iree_hal_dim_t sw_size, iree_hal_dim_t dh_size, iree_hal_dim_t dw_size, + iree_hal_element_type_t input_type, iree_hal_element_type_t kernel_type, + iree_hal_element_type_t acc_type, iree_byte_span_t input_contents, + iree_byte_span_t kernel_contents, iree_byte_span_t acc_contents, + iree_byte_span_t result_contents, int compute_every) { + IREE_TRACE_ZONE_BEGIN(z0); + IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, n_size); + IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, c_size); + IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, h_size); + IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, w_size); + + iree_hal_dim_t oh_size = out_shape_calc(h_size, kh_size, sh_size, dh_size); + iree_hal_dim_t ow_size = out_shape_calc(w_size, kw_size, sw_size, dw_size); + + if (layout == 0) { + for (iree_hal_dim_t n = 0; n < n_size; ++n) { + for (iree_hal_dim_t oc = 0; oc < f_size; ++oc) { + for (iree_hal_dim_t oh = 0; oh < oh_size; ++oh) { + for (iree_hal_dim_t ow = 0; ow < ow_size; ++ow) { + IREE_RETURN_AND_END_ZONE_IF_ERROR( + z0, + reference_conv2d_element( + n_size, c_size, h_size, w_size, f_size, kh_size, kw_size, + layout, sh_size, sw_size, dh_size, dw_size, oh_size, + ow_size, input_type, kernel_type, acc_type, + input_contents.data, kernel_contents.data, + acc_contents.data, result_contents.data, n, oc, oh, ow)); + } + } + } + } + } else if (layout == 1) { + for (iree_hal_dim_t n = 0; n < n_size; ++n) { + for (iree_hal_dim_t oh = 0; oh < oh_size; ++oh) { + for (iree_hal_dim_t ow = 0; ow < ow_size; ++ow) { + for (iree_hal_dim_t oc = 0; oc < f_size; ++oc) { + IREE_RETURN_AND_END_ZONE_IF_ERROR( + z0, + reference_conv2d_element( + n_size, c_size, h_size, w_size, f_size, kh_size, kw_size, + layout, sh_size, sw_size, dh_size, dw_size, oh_size, + ow_size, input_type, kernel_type, acc_type, + input_contents.data, kernel_contents.data, + acc_contents.data, result_contents.data, n, oc, oh, ow)); + } + } + } + } + } else { + return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, + "unhandled conv2d layout"); + } + + IREE_TRACE_ZONE_END(z0); + return iree_ok_status(); +} + +//===----------------------------------------------------------------------===// +// Conv2d comparison/logging +//===----------------------------------------------------------------------===// + +typedef struct { + iree_allocator_t host_allocator; + iree_hal_dim_t n; // batch dim + iree_hal_dim_t c; // input channels + iree_hal_dim_t h; // input height + iree_hal_dim_t w; // input width + iree_hal_dim_t f; // output channels + iree_hal_dim_t kh; // kernel height + iree_hal_dim_t kw; // kernel width + iree_hal_dim_t layout; // conv layout, 0 : nchwxfchw (default); 1: nhwcxhwcf + iree_hal_dim_t sh; // stride along height dim + iree_hal_dim_t sw; // stride along width dim + iree_hal_dim_t dh; // dilation along height dim + iree_hal_dim_t dw; // dilation along width dim + iree_hal_element_type_t input_type; + iree_hal_element_type_t kernel_type; + iree_hal_element_type_t acc_type; + iree_hal_element_type_t result_type; + iree_byte_span_t input_contents; + iree_byte_span_t kernel_contents; + iree_byte_span_t acc_contents; + iree_byte_span_t actual_contents; + iree_byte_span_t expected_contents; +} conv2d_results_t; + +static void conv2d_results_deinitialize(conv2d_results_t *results); + +static iree_status_t conv2d_results_initialize( + iree_hal_device_t *device, iree_hal_dim_t n_size, iree_hal_dim_t c_size, + iree_hal_dim_t h_size, iree_hal_dim_t w_size, iree_hal_dim_t f_size, + iree_hal_dim_t kh_size, iree_hal_dim_t kw_size, iree_hal_dim_t layout, + iree_hal_dim_t sh_size, iree_hal_dim_t sw_size, iree_hal_dim_t dh_size, + iree_hal_dim_t dw_size, iree_hal_buffer_view_t *input, + iree_hal_buffer_view_t *kernel, iree_hal_buffer_view_t *acc, + iree_hal_buffer_view_t *result, iree_allocator_t host_allocator, + conv2d_results_t *out_results) { + IREE_TRACE_ZONE_BEGIN(z0); + + memset(out_results, 0, sizeof(*out_results)); + out_results->host_allocator = host_allocator; + + out_results->n = n_size; + out_results->c = c_size; + out_results->h = h_size; + out_results->w = w_size; + out_results->f = f_size; + out_results->kh = kh_size; + out_results->kw = kw_size; + out_results->layout = layout; + out_results->sh = sh_size; + out_results->sw = sw_size; + out_results->dh = dh_size; + out_results->dw = dw_size; + + out_results->input_type = iree_hal_buffer_view_element_type(input); + out_results->kernel_type = iree_hal_buffer_view_element_type(kernel); + out_results->acc_type = iree_hal_buffer_view_element_type(acc); + out_results->result_type = iree_hal_buffer_view_element_type(result); + + iree_hal_buffer_t *input_buffer = iree_hal_buffer_view_buffer(input); + iree_hal_buffer_t *kernel_buffer = iree_hal_buffer_view_buffer(kernel); + iree_hal_buffer_t *acc_buffer = acc ? iree_hal_buffer_view_buffer(acc) : NULL; + iree_hal_buffer_t *result_buffer = iree_hal_buffer_view_buffer(result); + + iree_status_t status = iree_ok_status(); + + if (iree_status_is_ok(status)) { + out_results->input_contents.data_length = + iree_hal_buffer_byte_length(input_buffer); + status = iree_allocator_malloc(host_allocator, + out_results->input_contents.data_length, + (void **)&out_results->input_contents.data); + } + if (iree_status_is_ok(status)) { + status = iree_hal_device_transfer_d2h( + device, input_buffer, 0, out_results->input_contents.data, + out_results->input_contents.data_length, + IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT, iree_infinite_timeout()); + } + + if (iree_status_is_ok(status)) { + out_results->kernel_contents.data_length = + iree_hal_buffer_byte_length(kernel_buffer); + status = iree_allocator_malloc(host_allocator, + out_results->kernel_contents.data_length, + (void **)&out_results->kernel_contents.data); + } + if (iree_status_is_ok(status)) { + status = iree_hal_device_transfer_d2h( + device, kernel_buffer, 0, out_results->kernel_contents.data, + out_results->kernel_contents.data_length, + IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT, iree_infinite_timeout()); + } + + if (acc_buffer) { + if (iree_status_is_ok(status)) { + out_results->acc_contents.data_length = + iree_hal_buffer_byte_length(acc_buffer); + status = iree_allocator_malloc(host_allocator, + out_results->acc_contents.data_length, + (void **)&out_results->acc_contents.data); + } + if (iree_status_is_ok(status)) { + status = iree_hal_device_transfer_d2h( + device, acc_buffer, 0, out_results->acc_contents.data, + out_results->acc_contents.data_length, + IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT, iree_infinite_timeout()); + } + } + + if (iree_status_is_ok(status)) { + out_results->actual_contents.data_length = + iree_hal_buffer_byte_length(result_buffer); + status = iree_allocator_malloc(host_allocator, + out_results->actual_contents.data_length, + (void **)&out_results->actual_contents.data); + } + if (iree_status_is_ok(status)) { + status = iree_hal_device_transfer_d2h( + device, result_buffer, 0, out_results->actual_contents.data, + out_results->actual_contents.data_length, + IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT, iree_infinite_timeout()); + } + + if (iree_status_is_ok(status)) { + out_results->expected_contents.data_length = + iree_hal_buffer_byte_length(result_buffer); + status = iree_allocator_malloc( + host_allocator, out_results->expected_contents.data_length, + (void **)&out_results->expected_contents.data); + } + + if (!iree_status_is_ok(status)) { + conv2d_results_deinitialize(out_results); + } + IREE_TRACE_ZONE_END(z0); + return status; +} + +static void conv2d_results_deinitialize(conv2d_results_t *results) { + IREE_TRACE_ZONE_BEGIN(z0); + + iree_allocator_free(results->host_allocator, results->input_contents.data); + iree_allocator_free(results->host_allocator, results->kernel_contents.data); + if (!iree_byte_span_is_empty(results->acc_contents)) { + iree_allocator_free(results->host_allocator, results->acc_contents.data); + } + iree_allocator_free(results->host_allocator, results->actual_contents.data); + iree_allocator_free(results->host_allocator, results->expected_contents.data); + + IREE_TRACE_ZONE_END(z0); +} + +// Helper for check_conv2d: the actual interesting part once we've +// obtained and validated the {n, f, oh, ow}_size values. On error, the first +// index is returned where the actual and expected value doesn't match. TODO: +// Add detailed logging to |file|. +static iree_status_t check_conv2d_results_impl(FILE *file, + const conv2d_results_t *results, + int check_every) { + IREE_TRACE_ZONE_BEGIN(z0); + + IREE_RETURN_AND_END_ZONE_IF_ERROR( + z0, + reference_conv2d( + results->n, results->c, results->h, results->w, results->f, + results->kh, results->kw, results->layout, results->sh, results->sw, + results->dh, results->dw, results->input_type, results->kernel_type, + results->acc_type, results->input_contents, results->kernel_contents, + results->acc_contents, results->expected_contents, check_every)); + + int count = 0; + + iree_hal_dim_t oh_size = + out_shape_calc(results->h, results->kh, results->sh, results->dh); + iree_hal_dim_t ow_size = + out_shape_calc(results->w, results->kw, results->sw, results->dw); + + for (iree_hal_dim_t n = 0; n < results->n; ++n) { + for (iree_hal_dim_t oc = 0; oc < results->f; ++oc) { + for (iree_hal_dim_t oh = 0; oh < oh_size; ++oh) { + for (iree_hal_dim_t ow = 0; ow < ow_size; ++ow) { + if (++count < check_every) + continue; + count = 0; + iree_hal_dim_t idx = + convert_to_1d_index(results->f, oh_size, ow_size, n, oc, oh, ow); + iree_test_utils_e2e_value_t actual_value = + iree_test_utils_read_buffer_element( + idx, results->result_type, results->actual_contents.data); + iree_test_utils_e2e_value_t expected_value = + iree_test_utils_read_buffer_element( + idx, results->result_type, results->expected_contents.data); + if (!iree_test_utils_result_elements_agree(actual_value, + expected_value)) { + fprintf( + file, + "\n\nerror: the actual and expected result tensors disagree " + "at n %" PRIdim ", oc %" PRIdim ", oh %" PRIdim ", ow %" PRIdim + ".\n\n", + n, oc, oh, ow); + IREE_TRACE_ZONE_END(z0); + return iree_make_status(IREE_STATUS_ABORTED); + } + } + } + } + } + + IREE_TRACE_ZONE_END(z0); + return iree_ok_status(); +} + +// Given an actual conv2d's inputs and output (all host-local), uses a +// reference conv2d implementation on the same inputs to check if the output +// is correct. On error, the first index is returned where the actual and +// expected value doesn't match. TODO: Add detailed logging to |file|. +static iree_status_t check_conv2d_results(FILE *file, + const conv2d_results_t *results) { + IREE_TRACE_ZONE_BEGIN(z0); + // TODO: Increase the check every param to reduce the number of comparisons. + int check_every = 1; + iree_status_t status = check_conv2d_results_impl(file, results, check_every); + if (!iree_status_is_ok(status) && check_every > 1) { + // If we got a failure with check_every>1, that didn't log a useful + // numerical summary, as most of the reference tensor entries hadn't been + // computed. Rerun now with check_every=1 to get that numerical logging. + iree_status_ignore(status); + status = check_conv2d_results_impl(file, results, 1); + } + IREE_TRACE_ZONE_END(z0); + return status; +} + +//===----------------------------------------------------------------------===// +// `conv2d_test` custom module +//===----------------------------------------------------------------------===// +// This uses the C++ wrapper to keep things simple. Though easier to use it's +// got additional overhead/code-size bloat that doesn't matter in a test like +// this. Making a C module builder API that removes the boilerplate there is +// TBD so this file is written in C besides this module so that we can swap it +// back to being pure C in the future. + +namespace iree { + +class Conv2dTestModuleState final { +public: + explicit Conv2dTestModuleState(iree_allocator_t host_allocator) + : host_allocator_(host_allocator) {} + ~Conv2dTestModuleState() = default; + + // Fills the destination span with pseudorandom values of the given + // |element_type|. The given |seed| is passed to the pseudorandom generator. + // The pseudorandom values are reproducible both across runs and across + // machines. + StatusOr> + GenerateRandom4dTensor(const vm::ref device, int64_t dim0, + int64_t dim1, int64_t dim2, int64_t dim3, + iree_hal_element_type_t element_type, int32_t seed) { + iree_hal_dim_t dims[4] = { + (iree_hal_dim_t)dim0, + (iree_hal_dim_t)dim1, + (iree_hal_dim_t)dim2, + (iree_hal_dim_t)dim3, + }; + iree_hal_buffer_params_t buffer_params = {0}; + buffer_params.usage = IREE_HAL_BUFFER_USAGE_DEFAULT; + buffer_params.access = IREE_HAL_MEMORY_ACCESS_ALL; + buffer_params.type = IREE_HAL_MEMORY_TYPE_OPTIMAL_FOR_DEVICE; + vm::ref result_view; + struct callback_state_t { + iree_hal_element_type_t element_type; + int32_t seed; + } callback_state = { + element_type, + seed, + }; + IREE_RETURN_IF_ERROR(iree_hal_buffer_view_generate_buffer( + device.get(), iree_hal_device_allocator(device.get()), + IREE_ARRAYSIZE(dims), dims, element_type, + IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR, buffer_params, + +[](iree_hal_buffer_mapping_t *mapping, void *user_data) { + callback_state_t callback_state = *(callback_state_t *)user_data; + iree_byte_span_t span = mapping->contents; + // Generate "uniform" integer-valued numbers in the range [min, + // max]. + int32_t min = 0; + int32_t max = 0; + iree_test_utils_get_min_max_for_element_type( + callback_state.element_type, &min, &max); + // divided by 4 to make numerical behavior more stable + uint32_t range = (max - min + 1) / 4; + iree_host_size_t element_byte_count = + iree_hal_element_dense_byte_count(callback_state.element_type); + uint8_t *data_end = span.data + span.data_length; + uint32_t state = callback_state.seed; + for (uint8_t *data = span.data; data < data_end; + data += element_byte_count) { + int32_t value = + (int32_t)iree_test_utils_pseudorandom_range(&state, range) + + min; + iree_test_utils_write_element(callback_state.element_type, value, + data); + } + return iree_ok_status(); + }, + &callback_state, &result_view)); + return std::move(result_view); + } + + Status + CheckConv2dResults(const vm::ref device, int64_t n, + int64_t c, int64_t h, int64_t w, int64_t f, int64_t kh, + int64_t kw, int64_t layout, int64_t sh, int64_t sw, + int64_t dh, int64_t dw, + const vm::ref input, + const vm::ref kernel, + const vm::ref acc, + const vm::ref actual_result) { + conv2d_results_t results = {}; + IREE_RETURN_IF_ERROR(conv2d_results_initialize( + device.get(), (iree_hal_dim_t)n, (iree_hal_dim_t)c, (iree_hal_dim_t)h, + (iree_hal_dim_t)w, (iree_hal_dim_t)f, (iree_hal_dim_t)kh, + (iree_hal_dim_t)kw, (iree_hal_dim_t)layout, (iree_hal_dim_t)sh, + (iree_hal_dim_t)sw, (iree_hal_dim_t)dh, (iree_hal_dim_t)dw, input.get(), + kernel.get(), acc.get(), actual_result.get(), host_allocator_, + &results)); + iree_status_t status = check_conv2d_results(stderr, &results); + conv2d_results_deinitialize(&results); + return status; + } + +private: + iree_allocator_t host_allocator_; +}; + +static const vm::NativeFunction + kConv2dTestModuleFunctions[] = { + vm::MakeNativeFunction("generate_random_tensor", + &Conv2dTestModuleState::GenerateRandom4dTensor), + vm::MakeNativeFunction("check_conv2d_results", + &Conv2dTestModuleState::CheckConv2dResults), +}; + +struct Conv2dTestModule final : public vm::NativeModule { + using vm::NativeModule::NativeModule; + StatusOr> + CreateState(iree_allocator_t host_allocator) override { + return std::make_unique(host_allocator); + } +}; + +} // namespace iree + +static iree_status_t conv2d_test_module_create(iree_vm_instance_t *instance, + iree_allocator_t host_allocator, + iree_vm_module_t **out_module) { + IREE_ASSERT_ARGUMENT(out_module); + *out_module = NULL; + auto module = std::make_unique( + "conv2d_test", /*version=*/0, instance, host_allocator, + iree::span>( + iree::kConv2dTestModuleFunctions)); + *out_module = module.release()->interface(); + return iree_ok_status(); +} + +int main(int argc, char **argv) { + IREE_TRACE_APP_ENTER(); + + iree_flags_parse_checked(IREE_FLAGS_PARSE_MODE_DEFAULT, &argc, &argv); + if (argc != 1) { + fprintf(stderr, "use --module= flags to specify the modules to run\n"); + IREE_TRACE_APP_EXIT(EXIT_FAILURE); + return EXIT_FAILURE; + } + + iree_status_t status = iree_test_utils_load_and_run_e2e_tests( + iree_allocator_system(), conv2d_test_module_create); + int exit_code = EXIT_SUCCESS; + if (!iree_status_is_ok(status)) { + iree_status_fprint(stderr, status); + bool is_unavailable = iree_status_is_unavailable(status); + iree_status_free(status); + exit_code = is_unavailable ? EXIT_SUCCESS : EXIT_FAILURE; + } + + IREE_TRACE_APP_EXIT(exit_code); + return exit_code; +} diff --git a/linalg_ops/test_utils.c b/linalg_ops/test_utils.c index 8b8aecd..9762861 100644 --- a/linalg_ops/test_utils.c +++ b/linalg_ops/test_utils.c @@ -194,7 +194,7 @@ bool iree_test_utils_result_elements_agree(iree_test_utils_e2e_value_t expected, // `require_exact_results` flag is set to `false`. case IREE_TEST_UTILS_VALUE_TYPE_F16: if (actual.f16_u16 == expected.f16_u16) return true; - if (iree_test_utils_max_elements_to_check()) return false; + if (iree_test_utils_require_exact_results()) return false; return fabsf(iree_math_f16_to_f32(actual.f16_u16) - iree_math_f16_to_f32(expected.f16_u16)) < acceptable_fp_delta;