Skip to content

Commit

Permalink
[Codegen][GPU] NFC: Move SPIRVCreateFastSlowPath to Common/GPU (#16669)
Browse files Browse the repository at this point in the history
This allows for reuse of this pass with other backends.
  • Loading branch information
qedawkins authored Mar 5, 2024
1 parent 94f64fa commit 7782a41
Show file tree
Hide file tree
Showing 16 changed files with 32 additions and 27 deletions.
4 changes: 4 additions & 0 deletions compiler/src/iree/compiler/Codegen/Common/GPU/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ iree_compiler_cc_library(
srcs = [
"AMDGPUDistributeContract.cpp",
"GPUCheckResourceUsage.cpp",
"GPUCreateFastSlowPath.cpp",
"GPUDistribute.cpp",
"GPUDistributeSharedMemoryCopy.cpp",
"GPUDistributionPatterns.cpp",
Expand Down Expand Up @@ -93,7 +94,9 @@ iree_compiler_cc_library(
"@llvm-project//mlir:AffineDialect",
"@llvm-project//mlir:AffineTransforms",
"@llvm-project//mlir:AffineUtils",
"@llvm-project//mlir:Analysis",
"@llvm-project//mlir:ArithDialect",
"@llvm-project//mlir:ArithUtils",
"@llvm-project//mlir:BufferizationDialect",
"@llvm-project//mlir:DestinationStyleOpInterface",
"@llvm-project//mlir:DialectUtils",
Expand All @@ -118,6 +121,7 @@ iree_compiler_cc_library(
"@llvm-project//mlir:SideEffectInterfaces",
"@llvm-project//mlir:Support",
"@llvm-project//mlir:TensorDialect",
"@llvm-project//mlir:TensorTransforms",
"@llvm-project//mlir:Transforms",
"@llvm-project//mlir:VectorDialect",
"@llvm-project//mlir:VectorToSCF",
Expand Down
4 changes: 4 additions & 0 deletions compiler/src/iree/compiler/Codegen/Common/GPU/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ iree_cc_library(
SRCS
"AMDGPUDistributeContract.cpp"
"GPUCheckResourceUsage.cpp"
"GPUCreateFastSlowPath.cpp"
"GPUDistribute.cpp"
"GPUDistributeSharedMemoryCopy.cpp"
"GPUDistributionPatterns.cpp"
Expand Down Expand Up @@ -78,7 +79,9 @@ iree_cc_library(
MLIRAffineDialect
MLIRAffineTransforms
MLIRAffineUtils
MLIRAnalysis
MLIRArithDialect
MLIRArithUtils
MLIRBufferizationDialect
MLIRDestinationStyleOpInterface
MLIRFuncDialect
Expand All @@ -102,6 +105,7 @@ iree_cc_library(
MLIRSideEffectInterfaces
MLIRSupport
MLIRTensorDialect
MLIRTensorTransforms
MLIRTransforms
MLIRVectorDialect
MLIRVectorToSCF
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//

#include "iree/compiler/Codegen/SPIRV/PassDetail.h"
#include "iree/compiler/Codegen/SPIRV/Passes.h"
#include "iree/compiler/Codegen/Common/GPU/PassDetail.h"
#include "iree/compiler/Codegen/Common/GPU/Passes.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Debug.h"
#include "mlir/Analysis/SliceAnalysis.h"
Expand Down Expand Up @@ -129,8 +129,8 @@ static void applyFastSlowPathConversion(mlir::FunctionOpInterface funcOp) {

namespace {

struct SPIRVCreateFastSlowPathPass final
: public SPIRVCreateFastSlowPathBase<SPIRVCreateFastSlowPathPass> {
struct GPUCreateFastSlowPathPass final
: public GPUCreateFastSlowPathBase<GPUCreateFastSlowPathPass> {
void getDependentDialects(DialectRegistry &registry) const override {
registry.insert<scf::SCFDialect>();
}
Expand All @@ -155,8 +155,8 @@ struct SPIRVCreateFastSlowPathPass final
} // namespace

std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
createSPIRVCreateFastSlowPathPass() {
return std::make_unique<SPIRVCreateFastSlowPathPass>();
createGPUCreateFastSlowPathPass() {
return std::make_unique<GPUCreateFastSlowPathPass>();
}

} // namespace mlir::iree_compiler
7 changes: 7 additions & 0 deletions compiler/src/iree/compiler/Codegen/Common/GPU/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,13 @@ std::unique_ptr<OperationPass<ModuleOp>> createGPUCheckResourceUsagePass(
std::function<unsigned(mlir::FunctionOpInterface)> getIndexBitwidth =
nullptr);

// Uses `tensor.pad` ops as anchors to create separate fast and slow paths
// inside the kernel. The fast path is for inner tiles where we don't need
// padding, while the slow path is for boundary tiles where we do need
// padding.
std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
createGPUCreateFastSlowPathPass();

/// Creates a pass to distribute scf.forall ops to GPU processors.
std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>> createGPUDistribute();

Expand Down
6 changes: 6 additions & 0 deletions compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ def GPUCheckResourceUsage :
let constructor = "mlir::iree_compiler::createGPUCheckResourceUsagePass()";
}

def GPUCreateFastSlowPath :
InterfacePass<"iree-codegen-gpu-create-fast-slow-path", "mlir::FunctionOpInterface"> {
let summary = "Create separate fast and slow paths to handle padding";
let constructor = "mlir::iree_compiler::createGPUCreateFastSlowPathPass()";
}

def GPUDistribute :
InterfacePass<"iree-codegen-gpu-distribute", "mlir::FunctionOpInterface"> {
let summary = "Pass to distribute scf.forall ops.";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ iree_lit_test_suite(
srcs = enforce_glob(
[
"gpu_check_resource_usage.mlir",
"gpu_create_fast_slow_path.mlir",
"gpu_distribute.mlir",
"gpu_distribute_shared_memory.mlir",
"gpu_generalize_named_ops.mlir",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ iree_lit_test_suite(
lit
SRCS
"gpu_check_resource_usage.mlir"
"gpu_create_fast_slow_path.mlir"
"gpu_distribute.mlir"
"gpu_distribute_shared_memory.mlir"
"gpu_generalize_named_ops.mlir"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-spirv-create-fast-slow-path))" --mlir-print-local-scope %s | FileCheck %s
// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-create-fast-slow-path))" --mlir-print-local-scope %s | FileCheck %s

func.func @padded_conv() {
%cst = arith.constant 0.000000e+00 : f32
Expand Down
1 change: 0 additions & 1 deletion compiler/src/iree/compiler/Codegen/SPIRV/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ iree_compiler_cc_library(
"Passes.cpp",
"SPIRVAnnotateWinogradLoops.cpp",
"SPIRVBreakDownLargeVector.cpp",
"SPIRVCreateFastSlowPath.cpp",
"SPIRVDistribute.cpp",
"SPIRVEmulateI64.cpp",
"SPIRVEraseStorageBufferStaticShape.cpp",
Expand Down
1 change: 0 additions & 1 deletion compiler/src/iree/compiler/Codegen/SPIRV/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ iree_cc_library(
"Passes.cpp"
"SPIRVAnnotateWinogradLoops.cpp"
"SPIRVBreakDownLargeVector.cpp"
"SPIRVCreateFastSlowPath.cpp"
"SPIRVDistribute.cpp"
"SPIRVEmulateI64.cpp"
"SPIRVEraseStorageBufferStaticShape.cpp"
Expand Down
3 changes: 1 addition & 2 deletions compiler/src/iree/compiler/Codegen/SPIRV/Passes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -328,8 +328,7 @@ void addSPIRVBaseVectorizePassPipeline(OpPassManager &pm) {
nestedModulePM.addPass(createCSEPass());

// Tile to GPU invocations and vectorize.
nestedModulePM.addNestedPass<func::FuncOp>(
createSPIRVCreateFastSlowPathPass());
nestedModulePM.addNestedPass<func::FuncOp>(createGPUCreateFastSlowPathPass());
nestedModulePM.addNestedPass<func::FuncOp>(createSPIRVTilePass());
nestedModulePM.addPass(createCanonicalizerPass());
nestedModulePM.addPass(createCSEPass());
Expand Down
7 changes: 0 additions & 7 deletions compiler/src/iree/compiler/Codegen/SPIRV/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,6 @@ createSPIRVAnnotateWinogradLoopsPass();
std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
createSPIRVBreakDownLargeVectorPass();

// Uses `tensor.pad` ops as anchors to create separate fast and slow paths
// inside the kernel. The fast path is for inner tiles where we don't need
// padding, while the slow path is for boundary tiles where we do need
// padding.
std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
createSPIRVCreateFastSlowPathPass();

/// Pass to distribute tiled loop nests to invocations.
std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
createSPIRVDistributePass();
Expand Down
6 changes: 0 additions & 6 deletions compiler/src/iree/compiler/Codegen/SPIRV/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,6 @@ def SPIRVBreakDownLargeVector : InterfacePass<"iree-spirv-breakdown-large-vector
let constructor = "mlir::iree_compiler::createSPIRVBreakDownLargeVectorPass()";
}

def SPIRVCreateFastSlowPath :
InterfacePass<"iree-spirv-create-fast-slow-path", "mlir::FunctionOpInterface"> {
let summary = "Create separate fast and slow paths to handle padding";
let constructor = "mlir::iree_compiler::createSPIRVCreateFastSlowPathPass()";
}

def SPIRVDistribute : InterfacePass<"iree-spirv-distribute", "mlir::FunctionOpInterface"> {
let summary = "Distribute tiled loop nests to invocations";
let constructor = "mlir::iree_compiler::createSPIRVDistributePass()";
Expand Down
1 change: 0 additions & 1 deletion compiler/src/iree/compiler/Codegen/SPIRV/test/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ iree_lit_test_suite(
"config_nvidia_matmul_cooperative_ops.mlir",
"config_user.mlir",
"convert_to_spirv.mlir",
"create_fast_slow_path.mlir",
"distribute_to_invocations.mlir",
"emulate_i64.mlir",
"erase_storage_buffer_static_shape.mlir",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ iree_lit_test_suite(
"config_nvidia_matmul_cooperative_ops.mlir"
"config_user.mlir"
"convert_to_spirv.mlir"
"create_fast_slow_path.mlir"
"distribute_to_invocations.mlir"
"emulate_i64.mlir"
"erase_storage_buffer_static_shape.mlir"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// RUN: iree-opt --split-input-file \
// RUN: --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-spirv-create-fast-slow-path,iree-spirv-tile,canonicalize,cse,iree-codegen-generic-vectorization,iree-spirv-initial-vector-lowering,iree-codegen-optimize-tensor-insert-extract-slices,iree-spirv-final-vector-lowering,canonicalize,cse)))))' \
// RUN: --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-codegen-gpu-create-fast-slow-path,iree-spirv-tile,canonicalize,cse,iree-codegen-generic-vectorization,iree-spirv-initial-vector-lowering,iree-codegen-optimize-tensor-insert-extract-slices,iree-spirv-final-vector-lowering,canonicalize,cse)))))' \
// RUN: %s | FileCheck %s

#config = #iree_codegen.lowering_config<tile_sizes = [[0, 4, 4, 16], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>
Expand Down

0 comments on commit 7782a41

Please sign in to comment.