[LinalgExt] Add iree_linalg_ext.im2col op and verifier (#17644)

This PR adds a new iree_linalg_ext.im2col op representing the im2col transformation for convolutions. The PR contains the op definition and the verifier. --------- Signed-off-by: Max Dawkins <[email protected]>
iree-org · Jun 24, 2024 · 1f69b85 · 1f69b85
1 parent e41e71c
commit 1f69b85
Show file tree

Hide file tree

Showing 5 changed files with 450 additions and 15 deletions.
diff --git a/compiler/src/iree/compiler/Dialect/LinalgExt/IR/LinalgExtOps.cpp b/compiler/src/iree/compiler/Dialect/LinalgExt/IR/LinalgExtOps.cpp
@@ -20,6 +20,7 @@
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Dialect/Utils/StaticValueUtils.h"
 #include "mlir/Dialect/Utils/StructuredOpsUtils.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
@@ -624,24 +625,30 @@ areNotFullTiles(ArrayRef<int64_t> inputShape,
   return false;
 }
 
+static SmallVector<OpFoldResult> getMixedValues(MLIRContext *context,
+                                                ArrayRef<int64_t> staticValues,
+                                                OperandRange dynamicValues) {
+  OpBuilder b(context);
+  return mlir::getMixedValues(staticValues, dynamicValues, b);
+}
+
+static SmallVector<int64_t>
+getStaticValues(SmallVector<OpFoldResult> mixedValues) {
+  SmallVector<Value> dynamicTiles;
+  SmallVector<int64_t> staticTiles;
+  dispatchIndexOpFoldResults(mixedValues, dynamicTiles, staticTiles);
+  return staticTiles;
+}
+
 /// Utility function shared between Pack and UnPack to get the tile sizes as
 /// OpFoldResults.
 // TODO: interface or base class in .td
 template <typename OpTy>
 static SmallVector<OpFoldResult> getMixedTiles(OpTy op) {
   static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
                 "applies to only pack or unpack operations");
-  SmallVector<OpFoldResult> mixedInnerTiles;
-  unsigned dynamicValIndex = 0;
-  OpBuilder b(op.getContext());
-  for (int64_t tileSize : op.getStaticInnerTiles()) {
-    if (!ShapedType::isDynamic(tileSize)) {
-      mixedInnerTiles.push_back(b.getIndexAttr(tileSize));
-    } else {
-      mixedInnerTiles.push_back(op.getInnerTiles()[dynamicValIndex++]);
-    }
-  }
-  return mixedInnerTiles;
+  return LinalgExt::getMixedValues(op.getContext(), op.getStaticInnerTiles(),
+                                   op.getInnerTiles());
 }
 
 /// Return the tile sizes as `int64_t`. If a tile size is dynamic a sentinel
@@ -650,10 +657,7 @@ template <typename OpTy>
 static SmallVector<int64_t> getStaticTiles(OpTy op) {
   static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
                 "applies to only pack or unpack operations");
-  SmallVector<Value> dynamicTiles;
-  SmallVector<int64_t> staticTiles;
-  dispatchIndexOpFoldResults(op.getMixedTiles(), dynamicTiles, staticTiles);
-  return staticTiles;
+  return getStaticValues(op.getMixedTiles());
 }
 
 /// Utility function shared between Pack and UnPack to get a map between
@@ -1502,6 +1506,148 @@ SmallVector<AffineMap> OnlineAttentionOp::getIndexingMapsArray() {
       getIndexingMaps().getAsValueRange<AffineMapAttr>());
 }
 
+//===----------------------------------------------------------------------===//
+// Im2colOp
+//===----------------------------------------------------------------------===//
+
+/// Return all static and dynamic kernel_size as OpFoldResults.
+SmallVector<OpFoldResult> Im2colOp::getMixedKernelSize() {
+  return LinalgExt::getMixedValues(getContext(), getStaticKernelSize(),
+                                   getKernelSize());
+}
+
+/// Return all static and dynamic k_offset as OpFoldResults.
+SmallVector<OpFoldResult> Im2colOp::getMixedKOffset() {
+  return LinalgExt::getMixedValues(getContext(), getStaticKOffset(),
+                                   getKOffset());
+}
+
+/// Return all static and dynamic k_offset as OpFoldResults.
+SmallVector<OpFoldResult> Im2colOp::getMixedMOffset() {
+  return LinalgExt::getMixedValues(getContext(), getStaticMOffset(),
+                                   getMOffset());
+}
+
+void Im2colOp::setMixedKOffset(SmallVector<OpFoldResult> kOffset) {
+  SmallVector<int64_t> staticKOffset;
+  SmallVector<Value> dynamicKOffset;
+  dispatchIndexOpFoldResults(kOffset, dynamicKOffset, staticKOffset);
+  setStaticKOffset(staticKOffset);
+  getKOffsetMutable().assign(dynamicKOffset);
+}
+
+void Im2colOp::setMixedMOffset(SmallVector<OpFoldResult> mOffset) {
+  SmallVector<int64_t> staticMOffset;
+  SmallVector<Value> dynamicMOffset;
+  dispatchIndexOpFoldResults(mOffset, dynamicMOffset, staticMOffset);
+  setStaticMOffset(staticMOffset);
+  getMOffsetMutable().assign(dynamicMOffset);
+}
+
+/// Custom builder methods for im2col op.
+void Im2colOp::build(OpBuilder &builder, OperationState &state, Value input,
+                     Value output, ArrayRef<int64_t> strides,
+                     ArrayRef<int64_t> dilations,
+                     ArrayRef<OpFoldResult> kernelSize,
+                     ArrayRef<OpFoldResult> kOffset,
+                     ArrayRef<OpFoldResult> mOffset, ArrayRef<int64_t> batchPos,
+                     ArrayRef<int64_t> mPos, ArrayRef<int64_t> kPos) {
+  assert(strides.size() == kernelSize.size() &&
+         dilations.size() == kernelSize.size() &&
+         mPos.size() == kernelSize.size() &&
+         "strides, dilations, m_pos, and kernel expected to be the same rank");
+  SmallVector<int64_t> staticKernelSize, staticMOffset, staticKOffset;
+  SmallVector<Value> dynamicKernelSize, dynamicMOffset, dynamicKOffset;
+  dispatchIndexOpFoldResults(kernelSize, dynamicKernelSize, staticKernelSize);
+  dispatchIndexOpFoldResults(mOffset, dynamicMOffset, staticMOffset);
+  dispatchIndexOpFoldResults(kOffset, dynamicKOffset, staticKOffset);
+  SmallVector<Type> resultType;
+  auto outputType = output.getType();
+  if (isa<RankedTensorType>(outputType)) {
+    resultType.push_back(outputType);
+  }
+  build(builder, state, resultType, input, output,
+        builder.getDenseI64ArrayAttr(strides),
+        builder.getDenseI64ArrayAttr(dilations), dynamicKernelSize,
+        builder.getDenseI64ArrayAttr(staticKernelSize), dynamicKOffset,
+        builder.getDenseI64ArrayAttr(staticKOffset), dynamicMOffset,
+        builder.getDenseI64ArrayAttr(staticMOffset),
+        builder.getDenseI64ArrayAttr(batchPos),
+        builder.getDenseI64ArrayAttr(mPos), builder.getDenseI64ArrayAttr(kPos));
+}
+
+LogicalResult Im2colOp::verify() {
+  Operation *op = getOperation();
+  if (llvm::count_if(getDpsInputs(), [](Value v) {
+        return isa<ShapedType>(v.getType());
+      }) != 1) {
+    return op->emitOpError("expected only one ShapedType operand");
+  }
+  if (getNumDpsInits() != 1) {
+    return op->emitOpError("expected one output operand");
+  }
+
+  // TODO(Max191): Support cases with more than 1 m or k dimension, and remove
+  // the check for a single m_offset and k_offset.
+  if (getMixedMOffset().size() != 1) {
+    return op->emitOpError("expected one m_offset");
+  }
+  if (getMixedKOffset().size() != 1) {
+    return op->emitOpError("expected one k_offset");
+  }
+  auto inputType = getInputType();
+  unsigned inputRank = inputType.getRank();
+  ArrayRef<int64_t> batchPos = getBatchPos();
+  ArrayRef<int64_t> mPos = getMPos();
+  ArrayRef<int64_t> kPos = getKPos();
+  if (inputRank != batchPos.size() + mPos.size() + kPos.size()) {
+    return op->emitOpError(
+        "expected input rank to be the sum of batch, m, and k ranks");
+  }
+  ArrayRef<int64_t> strides = getStrides();
+  ArrayRef<int64_t> dilations = getDilations();
+  SmallVector<OpFoldResult> kernelSize = getMixedKernelSize();
+  if (kernelSize.size() != mPos.size()) {
+    return op->emitOpError(
+        "expected kernel rank to be equal to the m_pos rank");
+  }
+  if (strides.size() != kernelSize.size()) {
+    return op->emitOpError(
+        "expected strides rank to be equal to the kernel rank");
+  }
+  if (dilations.size() != kernelSize.size()) {
+    return op->emitOpError(
+        "expected dilations rank to be equal to the kernel rank");
+  }
+
+  ArrayRef<int64_t> inputShape = inputType.getShape();
+  SmallVector<int64_t> expectedOutputShape;
+  for (auto pos : batchPos) {
+    expectedOutputShape.push_back(inputShape[pos]);
+  }
+  ArrayRef<int64_t> outputShape = getOutputType().getShape();
+  // When the op is tiled, the m and k dimensions of the output are tiled, but
+  // they are not tiled in the input, so we cannot verify the output size of
+  // these dimensions.
+  expectedOutputShape.push_back(outputShape[outputShape.size() - 2]);
+  expectedOutputShape.push_back(outputShape.back());
+  if (failed(verifyCompatibleShape(expectedOutputShape, outputShape))) {
+    return op->emitOpError("incompatible output shape");
+  }
+  return success();
+}
+
+LogicalResult Im2colOp::fold(FoldAdaptor, SmallVectorImpl<OpFoldResult> &) {
+  return memref::foldMemRefCast(*this);
+}
+
+LogicalResult
+Im2colOp::reifyResultShapes(OpBuilder &b,
+                            ReifiedRankedShapedTypeDims &reifiedReturnShapes) {
+  return cast<LinalgExtOp>(getOperation())
+      .reifyResultShapes(b, reifiedReturnShapes);
+}
+
 #define DEFINE_OP_GET_EFFECTS(OP_NAME)                                         \
   void OP_NAME::getEffects(                                                    \
       SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>      \
@@ -1522,6 +1668,7 @@ DEFINE_OP_GET_EFFECTS(WinogradFilterTransformOp)
 DEFINE_OP_GET_EFFECTS(WinogradOutputTransformOp)
 DEFINE_OP_GET_EFFECTS(AttentionOp)
 DEFINE_OP_GET_EFFECTS(OnlineAttentionOp)
+DEFINE_OP_GET_EFFECTS(Im2colOp)
 
 } // namespace mlir::iree_compiler::IREE::LinalgExt
 

diff --git a/compiler/src/iree/compiler/Dialect/LinalgExt/IR/LinalgExtOps.td b/compiler/src/iree/compiler/Dialect/LinalgExt/IR/LinalgExtOps.td
@@ -768,6 +768,134 @@ def IREELinalgExt_OnlineAttentionOp : IREELinalgExt_PureOp<"online_attention",
     }
   }];
 }
+//===----------------------------------------------------------------------===//
+// Im2col
+//===----------------------------------------------------------------------===//
+
+def IREELinalgExt_Im2colOp : IREELinalgExt_Op<"im2col",
+    [DeclareOpInterfaceMethods<ReifyRankedShapedTypeOpInterface>]> {
+  let summary = "Im2col operation for convolutions";
+  let description = [{
+    Im2col op for convolutions. The operation performs a transformation on the
+    input to convert it from a convolution input to an equivalent gemm input.
+    The op is defined by its input, output, some conv metadata, and some
+    indexing metadata. The `strides`, `dilations`, and `kernel_size` are taken
+    from the convolution from which this op is generated, and they define how
+    the input operand is indexed when the operation is decomposed. The shape of
+    the output should be `tensor<BxMxK>`, and the `m_pos`, `k_pos`, and
+    `batch_pos` indicate which input dimensions map to which output dimensions.
+
+    The `k_offset` is an offset within the output K dimension from which the
+    iteration space of the operation begins. This is used for tiling, since the
+    tiled implementation must leave the output K dimension untiled. Similarly,
+    `m_offset` is the offset within the output M dimension from which the
+    iteration space of the operation begins.
+    The iteration space is the full output shape of the im2col op, so if the
+    im2col op were tiled to loops with a scalar inner tile, it would look like
+    the following:
+    ```
+      %im2col = iree_linalg_ext.im2col
+          strides = [1, 1] dilations = [1, 1] kernel_size = [3, 3]
+          m_offset = [0] k_offset = [0]
+          batch_pos = [0] m_pos = [1, 2] k_pos = [3]
+          ins(%in : tensor<2x34x34x640xf32>)
+          outs(%out : tensor<2x1024x5760xf32>) -> tensor<2x1024x5760xf32>
+    ```
+    becomes:
+    ```
+      scf.for %arg0 = %c0 to %c2 step %c1
+        scf.for %arg1 = %c0 to %c1024 step %c1
+          scf.for %arg2 = %c0 to %c5760 step %c1
+            %im2col = iree_linalg_ext.im2col
+                strides = [1, 1] dilations = [1, 1] kernel_size = [3, 3]
+                m_offset = [%arg1] k_offset = [%arg2]
+                batch_pos = [0] m_pos = [1, 2] k_pos = [3]
+                ins(%in_tile : tensor<1x34x34x640xf32>)
+                outs(%out_tile : tensor<1x1x1xf32>) -> tensor<1x1x1xf32>
+    ```
+    Then, when the tiled op is decomposed, it becomes a loop over the iteration
+    space of the im2col op, whith an extract_slice from the `%in_tile` followed
+    by an insert_slice to the `%out_tile`. The indices for the extract slice are
+    computed using the `m_offset` and `k_offset` as:
+    (b, m, k) -> (b, M / 32 + K / (640*3), M % 32 + K % (640*3) / 640, K % 640)
+    Where `(b, m, k)` are the indices of the tiled op's iteration space, and
+    `M = m + m_offset` and `K = k + K_offset`.
+  }];
+
+  let arguments = (ins AnyShaped:$input, AnyShaped:$output,
+                       DenseI64ArrayAttr:$strides,
+                       DenseI64ArrayAttr:$dilations,
+                       Variadic<Index>:$kernel_size,
+                       DenseI64ArrayAttr:$static_kernel_size,
+                       Variadic<Index>:$m_offset,
+                       DenseI64ArrayAttr:$static_m_offset,
+                       Variadic<Index>:$k_offset,
+                       DenseI64ArrayAttr:$static_k_offset,
+                       DenseI64ArrayAttr:$batch_pos,
+                       DenseI64ArrayAttr:$m_pos,
+                       DenseI64ArrayAttr:$k_pos);
+
+  let results = (outs Variadic<AnyShaped>:$results);
+  let hasFolder = 1;
+  let assemblyFormat = [{
+    attr-dict
+    `strides` `=` $strides
+    `dilations` `=` $dilations
+    `kernel_size` `=`
+    custom<DynamicIndexList>($kernel_size, $static_kernel_size)
+    `m_offset` `=`
+    custom<DynamicIndexList>($m_offset, $static_m_offset)
+    `k_offset` `=`
+    custom<DynamicIndexList>($k_offset, $static_k_offset)
+    `batch_pos` `=` $batch_pos
+    `m_pos` `=` $m_pos
+    `k_pos` `=` $k_pos
+    `ins` `(` $input `:` type($input) `)`
+    `outs` `(` $output `:` type($output) `)`
+    (`->` type($results)^)?
+  }];
+
+  let builders = [
+    OpBuilder<(ins "Value":$input, "Value":$output,
+      "ArrayRef<int64_t>":$strides,
+      "ArrayRef<int64_t>":$dilations,
+      "ArrayRef<OpFoldResult>":$kernel_size,
+      "ArrayRef<OpFoldResult>":$m_offset,
+      "ArrayRef<OpFoldResult>":$k_offset,
+      "ArrayRef<int64_t>":$batch_dimensions,
+      "ArrayRef<int64_t>":$m_dimensions,
+      "ArrayRef<int64_t>":$k_dimensions)>
+  ];
+
+  let extraClassDeclaration = extraLinalgExtOpClassDeclaration # [{
+    ShapedType getInputType() {
+      return cast<ShapedType>(getInput().getType());
+    }
+    ShapedType getOutputType() {
+      return cast<ShapedType>(getOutput().getType());
+    }
+    int64_t getInputRank() {
+      return getInputType().getRank();
+    }
+    int64_t getOutputRank() {
+      return getOutputType().getRank();
+    }
+    // Return op metadata.
+    SmallVector<OpFoldResult> getMixedKernelSize();
+    SmallVector<OpFoldResult> getMixedMOffset();
+    SmallVector<OpFoldResult> getMixedKOffset();
+
+    // Set op metadata.
+    void setMixedKOffset(SmallVector<OpFoldResult> kOffset);
+    void setMixedMOffset(SmallVector<OpFoldResult> mOffset);
+
+    // Method to implement for specifying output range for
+    // DestinationStyleOpInterface
+    MutableOperandRange getDpsInitsMutable() {
+      return getOutputMutable();
+    }
+  }];
+}
 
 } // OpGroupNonStructuredOps