[Codegen][GPU] NFC: Move SPIRVCreateFastSlowPath to Common/GPU (#16669)

This allows for reuse of this pass with other backends.
iree-org · Mar 5, 2024 · 7782a41 · 7782a41
1 parent 94f64fa
commit 7782a41
Show file tree

Hide file tree

Showing 16 changed files with 32 additions and 27 deletions.
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/BUILD.bazel b/compiler/src/iree/compiler/Codegen/Common/GPU/BUILD.bazel
@@ -51,6 +51,7 @@ iree_compiler_cc_library(
     srcs = [
         "AMDGPUDistributeContract.cpp",
         "GPUCheckResourceUsage.cpp",
+        "GPUCreateFastSlowPath.cpp",
         "GPUDistribute.cpp",
         "GPUDistributeSharedMemoryCopy.cpp",
         "GPUDistributionPatterns.cpp",
@@ -93,7 +94,9 @@ iree_compiler_cc_library(
         "@llvm-project//mlir:AffineDialect",
         "@llvm-project//mlir:AffineTransforms",
         "@llvm-project//mlir:AffineUtils",
+        "@llvm-project//mlir:Analysis",
         "@llvm-project//mlir:ArithDialect",
+        "@llvm-project//mlir:ArithUtils",
         "@llvm-project//mlir:BufferizationDialect",
         "@llvm-project//mlir:DestinationStyleOpInterface",
         "@llvm-project//mlir:DialectUtils",
@@ -118,6 +121,7 @@ iree_compiler_cc_library(
         "@llvm-project//mlir:SideEffectInterfaces",
         "@llvm-project//mlir:Support",
         "@llvm-project//mlir:TensorDialect",
+        "@llvm-project//mlir:TensorTransforms",
         "@llvm-project//mlir:Transforms",
         "@llvm-project//mlir:VectorDialect",
         "@llvm-project//mlir:VectorToSCF",

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/Common/GPU/CMakeLists.txt
@@ -49,6 +49,7 @@ iree_cc_library(
   SRCS
     "AMDGPUDistributeContract.cpp"
     "GPUCheckResourceUsage.cpp"
+    "GPUCreateFastSlowPath.cpp"
     "GPUDistribute.cpp"
     "GPUDistributeSharedMemoryCopy.cpp"
     "GPUDistributionPatterns.cpp"
@@ -78,7 +79,9 @@ iree_cc_library(
     MLIRAffineDialect
     MLIRAffineTransforms
     MLIRAffineUtils
+    MLIRAnalysis
     MLIRArithDialect
+    MLIRArithUtils
     MLIRBufferizationDialect
     MLIRDestinationStyleOpInterface
     MLIRFuncDialect
@@ -102,6 +105,7 @@ iree_cc_library(
     MLIRSideEffectInterfaces
     MLIRSupport
     MLIRTensorDialect
+    MLIRTensorTransforms
     MLIRTransforms
     MLIRVectorDialect
     MLIRVectorToSCF

diff --git a/...Codegen/SPIRV/SPIRVCreateFastSlowPath.cpp → ...egen/Common/GPU/GPUCreateFastSlowPath.cpp b/...Codegen/SPIRV/SPIRVCreateFastSlowPath.cpp → ...egen/Common/GPU/GPUCreateFastSlowPath.cpp
@@ -13,8 +13,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "iree/compiler/Codegen/SPIRV/PassDetail.h"
-#include "iree/compiler/Codegen/SPIRV/Passes.h"
+#include "iree/compiler/Codegen/Common/GPU/PassDetail.h"
+#include "iree/compiler/Codegen/Common/GPU/Passes.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
 #include "mlir/Analysis/SliceAnalysis.h"
@@ -129,8 +129,8 @@ static void applyFastSlowPathConversion(mlir::FunctionOpInterface funcOp) {
 
 namespace {
 
-struct SPIRVCreateFastSlowPathPass final
-    : public SPIRVCreateFastSlowPathBase<SPIRVCreateFastSlowPathPass> {
+struct GPUCreateFastSlowPathPass final
+    : public GPUCreateFastSlowPathBase<GPUCreateFastSlowPathPass> {
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<scf::SCFDialect>();
   }
@@ -155,8 +155,8 @@ struct SPIRVCreateFastSlowPathPass final
 } // namespace
 
 std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
-createSPIRVCreateFastSlowPathPass() {
-  return std::make_unique<SPIRVCreateFastSlowPathPass>();
+createGPUCreateFastSlowPathPass() {
+  return std::make_unique<GPUCreateFastSlowPathPass>();
 }
 
 } // namespace mlir::iree_compiler
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.h b/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.h
@@ -77,6 +77,13 @@ std::unique_ptr<OperationPass<ModuleOp>> createGPUCheckResourceUsagePass(
     std::function<unsigned(mlir::FunctionOpInterface)> getIndexBitwidth =
         nullptr);
 
+// Uses `tensor.pad` ops as anchors to create separate fast and slow paths
+// inside the kernel. The fast path is for inner tiles where we don't need
+// padding, while the slow path is for boundary tiles where we do need
+// padding.
+std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
+createGPUCreateFastSlowPathPass();
+
 /// Creates a pass to distribute scf.forall ops to GPU processors.
 std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>> createGPUDistribute();
 

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td b/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td
@@ -19,6 +19,12 @@ def GPUCheckResourceUsage :
   let constructor = "mlir::iree_compiler::createGPUCheckResourceUsagePass()";
 }
 
+def GPUCreateFastSlowPath :
+    InterfacePass<"iree-codegen-gpu-create-fast-slow-path", "mlir::FunctionOpInterface"> {
+  let summary = "Create separate fast and slow paths to handle padding";
+  let constructor = "mlir::iree_compiler::createGPUCreateFastSlowPathPass()";
+}
+
 def GPUDistribute :
     InterfacePass<"iree-codegen-gpu-distribute", "mlir::FunctionOpInterface"> {
   let summary = "Pass to distribute scf.forall ops.";

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/BUILD.bazel b/compiler/src/iree/compiler/Codegen/Common/GPU/test/BUILD.bazel
@@ -19,6 +19,7 @@ iree_lit_test_suite(
     srcs = enforce_glob(
         [
             "gpu_check_resource_usage.mlir",
+            "gpu_create_fast_slow_path.mlir",
             "gpu_distribute.mlir",
             "gpu_distribute_shared_memory.mlir",
             "gpu_generalize_named_ops.mlir",

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/Common/GPU/test/CMakeLists.txt
@@ -15,6 +15,7 @@ iree_lit_test_suite(
     lit
   SRCS
     "gpu_check_resource_usage.mlir"
+    "gpu_create_fast_slow_path.mlir"
     "gpu_distribute.mlir"
     "gpu_distribute_shared_memory.mlir"
     "gpu_generalize_named_ops.mlir"

diff --git a/...gen/SPIRV/test/create_fast_slow_path.mlir → ...n/GPU/test/gpu_create_fast_slow_path.mlir b/...gen/SPIRV/test/create_fast_slow_path.mlir → ...n/GPU/test/gpu_create_fast_slow_path.mlir
@@ -1,4 +1,4 @@
-// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-spirv-create-fast-slow-path))" --mlir-print-local-scope %s | FileCheck %s
+// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-create-fast-slow-path))" --mlir-print-local-scope %s | FileCheck %s
 
 func.func @padded_conv() {
   %cst = arith.constant 0.000000e+00 : f32

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/BUILD.bazel b/compiler/src/iree/compiler/Codegen/SPIRV/BUILD.bazel
@@ -57,7 +57,6 @@ iree_compiler_cc_library(
         "Passes.cpp",
         "SPIRVAnnotateWinogradLoops.cpp",
         "SPIRVBreakDownLargeVector.cpp",
-        "SPIRVCreateFastSlowPath.cpp",
         "SPIRVDistribute.cpp",
         "SPIRVEmulateI64.cpp",
         "SPIRVEraseStorageBufferStaticShape.cpp",

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/SPIRV/CMakeLists.txt
@@ -56,7 +56,6 @@ iree_cc_library(
     "Passes.cpp"
     "SPIRVAnnotateWinogradLoops.cpp"
     "SPIRVBreakDownLargeVector.cpp"
-    "SPIRVCreateFastSlowPath.cpp"
     "SPIRVDistribute.cpp"
     "SPIRVEmulateI64.cpp"
     "SPIRVEraseStorageBufferStaticShape.cpp"

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/Passes.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/Passes.cpp
@@ -328,8 +328,7 @@ void addSPIRVBaseVectorizePassPipeline(OpPassManager &pm) {
   nestedModulePM.addPass(createCSEPass());
 
   // Tile to GPU invocations and vectorize.
-  nestedModulePM.addNestedPass<func::FuncOp>(
-      createSPIRVCreateFastSlowPathPass());
+  nestedModulePM.addNestedPass<func::FuncOp>(createGPUCreateFastSlowPathPass());
   nestedModulePM.addNestedPass<func::FuncOp>(createSPIRVTilePass());
   nestedModulePM.addPass(createCanonicalizerPass());
   nestedModulePM.addPass(createCSEPass());

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/Passes.h b/compiler/src/iree/compiler/Codegen/SPIRV/Passes.h
@@ -86,13 +86,6 @@ createSPIRVAnnotateWinogradLoopsPass();
 std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
 createSPIRVBreakDownLargeVectorPass();
 
-// Uses `tensor.pad` ops as anchors to create separate fast and slow paths
-// inside the kernel. The fast path is for inner tiles where we don't need
-// padding, while the slow path is for boundary tiles where we do need
-// padding.
-std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
-createSPIRVCreateFastSlowPathPass();
-
 /// Pass to distribute tiled loop nests to invocations.
 std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
 createSPIRVDistributePass();

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/Passes.td b/compiler/src/iree/compiler/Codegen/SPIRV/Passes.td
@@ -35,12 +35,6 @@ def SPIRVBreakDownLargeVector : InterfacePass<"iree-spirv-breakdown-large-vector
   let constructor = "mlir::iree_compiler::createSPIRVBreakDownLargeVectorPass()";
 }
 
-def SPIRVCreateFastSlowPath :
-    InterfacePass<"iree-spirv-create-fast-slow-path", "mlir::FunctionOpInterface"> {
-  let summary = "Create separate fast and slow paths to handle padding";
-  let constructor = "mlir::iree_compiler::createSPIRVCreateFastSlowPathPass()";
-}
-
 def SPIRVDistribute : InterfacePass<"iree-spirv-distribute", "mlir::FunctionOpInterface"> {
   let summary = "Distribute tiled loop nests to invocations";
   let constructor = "mlir::iree_compiler::createSPIRVDistributePass()";

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/BUILD.bazel b/compiler/src/iree/compiler/Codegen/SPIRV/test/BUILD.bazel
@@ -39,7 +39,6 @@ iree_lit_test_suite(
             "config_nvidia_matmul_cooperative_ops.mlir",
             "config_user.mlir",
             "convert_to_spirv.mlir",
-            "create_fast_slow_path.mlir",
             "distribute_to_invocations.mlir",
             "emulate_i64.mlir",
             "erase_storage_buffer_static_shape.mlir",

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/SPIRV/test/CMakeLists.txt
@@ -35,7 +35,6 @@ iree_lit_test_suite(
     "config_nvidia_matmul_cooperative_ops.mlir"
     "config_user.mlir"
     "convert_to_spirv.mlir"
-    "create_fast_slow_path.mlir"
     "distribute_to_invocations.mlir"
     "emulate_i64.mlir"
     "erase_storage_buffer_static_shape.mlir"

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir
@@ -1,5 +1,5 @@
 // RUN: iree-opt --split-input-file \
-// RUN:   --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-spirv-create-fast-slow-path,iree-spirv-tile,canonicalize,cse,iree-codegen-generic-vectorization,iree-spirv-initial-vector-lowering,iree-codegen-optimize-tensor-insert-extract-slices,iree-spirv-final-vector-lowering,canonicalize,cse)))))' \
+// RUN:   --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-codegen-gpu-create-fast-slow-path,iree-spirv-tile,canonicalize,cse,iree-codegen-generic-vectorization,iree-spirv-initial-vector-lowering,iree-codegen-optimize-tensor-insert-extract-slices,iree-spirv-final-vector-lowering,canonicalize,cse)))))' \
 // RUN:   %s | FileCheck %s
 
 #config = #iree_codegen.lowering_config<tile_sizes = [[0, 4, 4, 16], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>