Enable support for LTC Input/Output Mapping (#764)

* Save InputOutputAliases to TorchMlirComputation * Implement GetResultShape for TorchMlirLoweringContext * Use optional return type for GetResultShape * Remove support for aten::detach With this op enabled, tensors were being copied, which resulted in incorrect aliasing. * Add newline before printing I/O alias mapping * Changed printout to use "Input param" as label instead of "Input" * Remote shape inference function for aten::detach * Moved implementation of SetUpAlias to MlirLoweringContext As part of this change, TorchMlirComputation has been moved to the end of mlir_lowering_context.h so that it can access some new structs in TorchMlirLoweringContext * Use updated PyTorch API * Remove GetResultShape Complements this upstream PyTorch PR: pytorch/pytorch#75828 This PR adds support for mapping input and output tensors which alias each other. (e.g. maps input weight tensor in parameter to the same tensor in output after a training iteration) MLIR: func @graph(%arg0: !torch.vtensor<[1,5],f32>, %arg1: !torch.vtensor<[1],si64>, ..., %arg6: !torch.vtensor<[10,5],f32>, %arg7: !torch.vtensor<[10],f32>, ...) { ... return %arg0, %arg1, %17, %23, ... : !torch.vtensor<[1,5],f32>, !torch.vtensor<[1],si64>, !torch.vtensor<[10,5],f32>, !torch.vtensor<[10],f32>, ... } Input/Output Alias Mapping: Output: 0 -> Input: 0 Output: 1 -> Input: 1 Output: 2 -> Input: 6 Output: 3 -> Input: 7 The aten::detach op has also been disabled in this PR to fix the issue of tensors not aliasing properly due to copying.
llvm · Jul 29, 2022 · 8627bed · 8627bed
1 parent 05f3441
commit 8627bed
Show file tree

Hide file tree

Showing 6 changed files with 161 additions and 101 deletions.
diff --git a/build_tools/autogen_ltc_backend.py b/build_tools/autogen_ltc_backend.py
@@ -232,7 +232,6 @@ def extract_signatures(path):
     shape_inference_defs = extract_signatures(
         backend_path.joinpath("LazyShapeInference.cpp")
     )
-    assert len(shape_inference_defs) > 0
     assert len(shape_inference_decls) > len(shape_inference_defs)
 
     missing_defs = (

diff --git a/build_tools/autogen_ltc_backend.yaml b/build_tools/autogen_ltc_backend.yaml
@@ -19,6 +19,7 @@ blacklist:
 - zeros_like  # Error: TODO add support for type BaseType(name=<BaseTy.MemoryFormat: 12>)
 
 # Additional ops which autogen is supported for but don't compile yet
+- detach
 - item
 - size
 - where

diff --git a/python/torch_mlir/csrc/base_lazy_backend/LazyShapeInference.cpp b/python/torch_mlir/csrc/base_lazy_backend/LazyShapeInference.cpp
@@ -13,9 +13,5 @@
 namespace torch {
 namespace lazy {
 
-std::vector<Shape> compute_shape_detach(const at::Tensor& self) {
-  return {Shape(self.scalar_type(), self.sizes().vec())};
-}
-
 } // namespace lazy
 } // namespace torch
diff --git a/python/torch_mlir/csrc/base_lazy_backend/LazyShapeInference.h b/python/torch_mlir/csrc/base_lazy_backend/LazyShapeInference.h
@@ -38,7 +38,6 @@ TORCH_API std::vector<Shape> compute_shape_broadcast_to(const at::Tensor & self,
 TORCH_API std::vector<Shape> compute_shape_bucketize(const at::Tensor & self, const at::Tensor & boundaries, bool out_int32, bool right);
 TORCH_API std::vector<Shape> compute_shape_constant_pad_nd(const at::Tensor & self, at::IntArrayRef pad, const at::Scalar & value);
 TORCH_API std::vector<Shape> compute_shape_conv2d(const at::Tensor & input, const at::Tensor & weight, const c10::optional<at::Tensor> & bias, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, int64_t groups);
-TORCH_API std::vector<Shape> compute_shape_detach(const at::Tensor & self);
 TORCH_API std::vector<Shape> compute_shape_div(const at::Tensor & self, const at::Scalar & other);
 TORCH_API std::vector<Shape> compute_shape_div_(at::Tensor & self, const at::Scalar & other);
 TORCH_API std::vector<Shape> compute_shape_dropout(const at::Tensor & input, double p, bool train);

diff --git a/python/torch_mlir/csrc/base_lazy_backend/mlir_lowering_context.cpp b/python/torch_mlir/csrc/base_lazy_backend/mlir_lowering_context.cpp
@@ -27,63 +27,6 @@
 namespace torch {
 namespace lazy {
 
-///////////////////////////////////////////////////////////////////////////////
-// TorchMlir Computation
-///////////////////////////////////////////////////////////////////////////////
-
-TorchMlirComputation::TorchMlirComputation(
-    MlirOperation func_op, MlirContext mlir_context,
-    const std::shared_ptr<torch::jit::Graph>& graph)
-    : func_op_(std::move(func_op)), mlir_context_(std::move(mlir_context)),
-      graph_(graph), num_results_(graph_->outputs().size()) {
-  for (torch::jit::Value* input : graph_->inputs()) {
-    parameter_names_.push_back(input->debugName());
-  }
-}
-
-int TorchMlirComputation::parameters_size() const {
-  return parameter_names_.size();
-}
-
-const std::vector<torch::lazy::Shape>&
-TorchMlirComputation::parameter_shapes() const {
-  throw std::runtime_error(
-      "todo(whc) implement ts computation shapes or change interface");
-  return parameter_shapes_;
-}
-
-const std::vector<std::string>& TorchMlirComputation::parameter_names() const {
-  return parameter_names_;
-}
-
-const torch::lazy::Shape& TorchMlirComputation::result_shape() const {
-  throw std::runtime_error(
-      "todo(whc) implement ts computation shapes or change interface");
-  return result_shape_;
-}
-
-unsigned TorchMlirComputation::num_results() const { return num_results_; }
-
-MlirOperation TorchMlirComputation::func_op() const { return func_op_; }
-
-std::string TorchMlirComputation::to_string() const {
-  // Since we use the C-MLIR API, we need to use a callback to print.
-  MlirStringCallback print_callback = [](MlirStringRef part, void* user_data) {
-    // user_data is a void ptr to some data structure of our choice -- in this
-    // case, the string stream where we'll be accumulating the strings.
-    std::stringstream* ss_ptr = static_cast<std::stringstream*>(user_data);
-    *ss_ptr << std::string(part.data, part.length);
-  };
-
-  std::stringstream ss;
-  ss << "JIT Graph: \n"
-     << graph_->toString() << "\n\n"
-     << "MLIR: \n";
-  mlirOperationPrint(func_op_, print_callback, &ss);
-
-  return ss.str();
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 // TorchMlir Lowering Context
 ///////////////////////////////////////////////////////////////////////////////
@@ -115,10 +58,34 @@ TorchMlirLoweringContext::TorchMlirLoweringContext(
   RegisterMlirDialects();
 }
 
-// Get the shape of the result tuple component, given by index.
-torch::lazy::Shape
-TorchMlirLoweringContext::GetResultShape(size_t index) const {
-  UNIMPLEMENTED_FUNCTION_ERROR();
+void TorchMlirLoweringContext::SetUpAlias(
+    const std::vector<int64_t>& output_index, int64_t param_number,
+    const std::vector<int64_t>& param_index, bool must_alias) {
+  input_output_aliases_.push_back(
+      {output_index, param_number, param_index, must_alias});
+}
+
+bool TorchMlirLoweringContext::CheckResultShape(
+    const BackendDataPtr& parameter_data, size_t result_idx) {
+  TORCH_CHECK(
+      result_idx < root_tuple_.size(), "Tried getting result shape at index ",
+      result_idx, " which is out of bounds!");
+
+  torch::jit::Value* output = root_tuple_[result_idx];
+
+  if (c10::TensorTypePtr tensor_type =
+          output->type()->cast<c10::TensorType>()) {
+    auto scalar_type = tensor_type->scalarType();
+    auto sizes = tensor_type->sizes().concrete_sizes();
+
+    // Not guaranteed to have concrete size, so we need to check it exists.
+    if (scalar_type && sizes) {
+      return Shape(parameter_data->shape()) ==
+             Shape(scalar_type.value(), c10::ArrayRef<int64_t>(sizes.value()));
+    }
+  }
+
+  return false;
 }
 
 size_t TorchMlirLoweringContext::AddResult(const Output& output) {
@@ -153,7 +120,8 @@ ComputationPtr TorchMlirLoweringContext::Build() {
       /*getArgAttribute=*/[](int) -> MlirAttribute { return {nullptr}; },
       /*importOptions=*/{/*assumeTensorsHaveValueSemantics=*/true});
 
-  return std::make_shared<TorchMlirComputation>(func_op, mlir_context_, graph_);
+  return std::make_shared<TorchMlirComputation>(
+      func_op, mlir_context_, graph_, input_output_aliases_);
 }
 
 torch::jit::Value* TorchMlirLoweringContext::GetOutputOp(const Output& output) {
@@ -292,5 +260,75 @@ void TorchMlirLoweringContext::RegisterMlirDialects() {
   torchMlirRegisterAllDialects(mlir_context_);
 }
 
+///////////////////////////////////////////////////////////////////////////////
+// TorchMlir Computation
+///////////////////////////////////////////////////////////////////////////////
+
+TorchMlirComputation::TorchMlirComputation(
+    MlirOperation func_op, MlirContext mlir_context,
+    const std::shared_ptr<torch::jit::Graph>& graph,
+    InputOutputAliases input_output_aliases)
+    : func_op_(std::move(func_op)), mlir_context_(std::move(mlir_context)),
+      graph_(graph), input_output_aliases_(input_output_aliases),
+      num_results_(graph_->outputs().size()) {
+  for (torch::jit::Value* input : graph_->inputs()) {
+    parameter_names_.push_back(input->debugName());
+  }
+}
+
+int TorchMlirComputation::parameters_size() const {
+  return parameter_names_.size();
+}
+
+const std::vector<torch::lazy::Shape>&
+TorchMlirComputation::parameter_shapes() const {
+  throw std::runtime_error(
+      "todo(whc) implement ts computation shapes or change interface");
+  return parameter_shapes_;
+}
+
+const std::vector<std::string>& TorchMlirComputation::parameter_names() const {
+  return parameter_names_;
+}
+
+const torch::lazy::Shape& TorchMlirComputation::result_shape() const {
+  throw std::runtime_error(
+      "todo(whc) implement ts computation shapes or change interface");
+  return result_shape_;
+}
+
+unsigned TorchMlirComputation::num_results() const { return num_results_; }
+
+MlirOperation TorchMlirComputation::func_op() const { return func_op_; }
+
+std::string TorchMlirComputation::to_string() const {
+  // Since we use the C-MLIR API, we need to use a callback to print.
+  MlirStringCallback print_callback = [](MlirStringRef part, void* user_data) {
+    // user_data is a void ptr to some data structure of our choice -- in this
+    // case, the string stream where we'll be accumulating the strings.
+    std::stringstream* ss_ptr = static_cast<std::stringstream*>(user_data);
+    *ss_ptr << std::string(part.data, part.length);
+  };
+
+  std::stringstream ss;
+
+  // JIT Graph
+  ss << "JIT Graph: \n" << graph_->toString() << "\n\n";
+
+  // MLIR
+  ss << "MLIR: \n";
+  mlirOperationPrint(func_op_, print_callback, &ss);
+  ss << "\n";
+
+  // Input/Output Mapping
+  ss << "Input/Output Alias Mapping: \n";
+  for (InputOutputAlias input_output_alias : input_output_aliases_) {
+    ss << "Output: " << input_output_alias.output_index
+       << " -> Input param: " << input_output_alias.param_number << std::endl;
+  }
+
+  return ss.str();
+}
+
 } // namespace lazy
 } // namespace torch
diff --git a/python/torch_mlir/csrc/base_lazy_backend/mlir_lowering_context.h b/python/torch_mlir/csrc/base_lazy_backend/mlir_lowering_context.h
@@ -39,48 +39,37 @@ class TORCH_API TorchMlirNodeLoweringInterface {
   Create(LoweringContext* loctx);
 };
 
-class TORCH_API TorchMlirComputation : public torch::lazy::Computation {
-public:
-  TorchMlirComputation(
-      MlirOperation func_op, MlirContext mlir_context,
-      const std::shared_ptr<torch::jit::Graph>& graph);
-
-  int parameters_size() const override;
-
-  const std::vector<torch::lazy::Shape>& parameter_shapes() const override;
-
-  const std::vector<std::string>& parameter_names() const override;
-
-  const torch::lazy::Shape& result_shape() const override;
-
-  unsigned num_results() const;
-
-  MlirOperation func_op() const;
-
-  std::string to_string() const;
-
-private:
-  std::vector<std::string> parameter_names_;
-  std::vector<Shape> parameter_shapes_;
-  Shape result_shape_;
-
-  MlirOperation func_op_;
-  MlirContext mlir_context_;
-  std::shared_ptr<torch::jit::Graph> graph_;
-  unsigned num_results_;
-};
-
 class TORCH_API TorchMlirLoweringContext : public torch::lazy::LoweringContext {
 public:
+  // Describes an input/output alias as inserted by the SetUpAlias() API.
+  struct InputOutputAlias {
+    // Specifies the index of the aliased buffer in the result tuple.
+    std::vector<int64_t> output_index;
+    // Specifies the parameter containing the buffer to be aliased.
+    int64_t param_number;
+    // Specifies the index of the aliased buffer in the parameter
+    std::vector<int64_t> param_index;
+    // Specifies if the alias is a must alias or may alias.
+    bool must_alias;
+  };
+  using InputOutputAliases = std::vector<InputOutputAlias>;
+
   TorchMlirLoweringContext(
       const std::string& name, torch::lazy::BackendDevice device);
   TorchMlirLoweringContext(
       const std::string& name, torch::lazy::BackendDevice device,
       c10::ArrayRef<torch::lazy::Node*> post_order,
       torch::lazy::Util::EmissionMap emit_status);
 
-  // Get the shape of the result tuple component, given by index.
-  torch::lazy::Shape GetResultShape(size_t index) const override;
+  // Adds a new input/output alias.
+  void SetUpAlias(
+      const std::vector<int64_t>& output_index, int64_t param_number,
+      const std::vector<int64_t>& param_index,
+      bool must_alias = false) override;
+
+  // Check if parameter shape matches result at index.
+  bool CheckResultShape(
+      const BackendDataPtr& parameter_data, size_t result_idx) override;
 
   // Adds the given output as a component of the result tuple and returns its
   // assigned position within the tuple.
@@ -128,6 +117,8 @@ class TORCH_API TorchMlirLoweringContext : public torch::lazy::LoweringContext {
 
   void RegisterMlirDialects();
 
+  // Holds the input/output alias information populated by the SetUpAlias() API.
+  InputOutputAliases input_output_aliases_;
   std::shared_ptr<torch::jit::Graph> graph_;
   MlirContext mlir_context_;
   std::unordered_map<BackendData::Handle, Parameter> parameters_map_;
@@ -136,5 +127,41 @@ class TORCH_API TorchMlirLoweringContext : public torch::lazy::LoweringContext {
   std::unique_ptr<TorchMlirNodeLoweringInterface> lowering_;
 };
 
+class TORCH_API TorchMlirComputation : public torch::lazy::Computation {
+public:
+  using InputOutputAliases = TorchMlirLoweringContext::InputOutputAliases;
+  using InputOutputAlias = TorchMlirLoweringContext::InputOutputAlias;
+
+  TorchMlirComputation(
+      MlirOperation func_op, MlirContext mlir_context,
+      const std::shared_ptr<torch::jit::Graph>& graph,
+      InputOutputAliases input_output_aliases);
+
+  int parameters_size() const override;
+
+  const std::vector<torch::lazy::Shape>& parameter_shapes() const override;
+
+  const std::vector<std::string>& parameter_names() const override;
+
+  const torch::lazy::Shape& result_shape() const override;
+
+  unsigned num_results() const;
+
+  MlirOperation func_op() const;
+
+  std::string to_string() const;
+
+private:
+  std::vector<std::string> parameter_names_;
+  std::vector<Shape> parameter_shapes_;
+  Shape result_shape_;
+
+  MlirOperation func_op_;
+  MlirContext mlir_context_;
+  std::shared_ptr<torch::jit::Graph> graph_;
+  InputOutputAliases input_output_aliases_;
+  unsigned num_results_;
+};
+
 } // namespace lazy
 } // namespace torch
-Original file line number
+Diff line change
@@ Expand Up / @@ -19,6 +19,7 @@ blacklist: @@
     - zeros_like  # Error: TODO add support for type BaseType(name=<BaseTy.MemoryFormat: 12>)
     # Additional ops which autogen is supported for but don't compile yet
+    - detach
     - item
     - size
     - where
@@ Expand Down @@