diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc index 2fcf9a1d7d9ba..cbdd8c608d1bc 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc @@ -313,6 +313,15 @@ void AddOperationInput(MILSpec::Operation& op, std::string_view input_name, std: (*op.mutable_inputs())[input_name] = std::move(arg); } +void AddOperationInputs(MILSpec::Operation& op, std::string_view input_name, + const std::vector& value_names) { + MILSpec::Argument arg; + for (const auto& value : value_names) { + arg.mutable_arguments()->Add()->set_name(std::string(value)); + } + (*op.mutable_inputs())[input_name] = std::move(arg); +} + void AddOperationOutput(COREML_SPEC::MILSpec::Operation& op, const NodeArg& output) { auto& outputs = *op.mutable_outputs(); auto& output_arg = *outputs.Add(); diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h index 97fb83b6dc482..7387b712c997f 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h +++ b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h @@ -129,6 +129,15 @@ COREML_SPEC::MILSpec::NamedValueType CreateNamedTensorValueType(const NodeArg& n void AddOperationInput(COREML_SPEC::MILSpec::Operation& op, std::string_view input_name, std::string_view value_name); +/// +/// Add a variadic input argument to a MILSpec::Operation +/// +/// Operation to update. +/// The input name defined by the spec for the operation. +/// The input value names. +void AddOperationInputs(COREML_SPEC::MILSpec::Operation& op, std::string_view input_name, + const std::vector& value_names); + /// /// Add an output to a MILSpec::Operation. Name, data type and shape are used from the NodeArg. /// diff --git a/onnxruntime/core/providers/coreml/builders/impl/concat_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/concat_op_builder.cc index 34193318a0264..551d8222cc062 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/concat_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/concat_op_builder.cc @@ -4,6 +4,7 @@ #include "core/providers/common.h" #include "core/providers/coreml/builders/helper.h" #include "core/providers/coreml/builders/impl/base_op_builder.h" +#include "core/providers/coreml/builders/impl/builder_utils.h" #include "core/providers/coreml/builders/model_builder.h" #include "core/providers/coreml/builders/op_builder_factory.h" #include "core/providers/coreml/shape_utils.h" @@ -18,27 +19,52 @@ class ConcatOpBuilder : public BaseOpBuilder { bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params, const logging::Logger& logger) const override; + + bool SupportsMLProgram() const override { return true; } }; Status ConcatOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node, const logging::Logger& logger) const { - std::unique_ptr layer = model_builder.CreateNNLayer(node); - - layer->mutable_concat()->set_sequenceconcat(false); - - for (const auto* input : node.InputDefs()) { - LOGS(logger, VERBOSE) << "input name " << input->Name(); - *layer->mutable_input()->Add() = input->Name(); +#if defined(COREML_ENABLE_MLPROGRAM) + if (model_builder.CreateMLProgram()) { + using namespace CoreML::Specification::MILSpec; // NOLINT + + NodeAttrHelper helper(node); + const auto axis = helper.GetInt64("axis"); // required + const auto interleave = false; + + std::unique_ptr op = model_builder.CreateOperation(node, "concat"); + std::vector input_names; + for (const auto* input : node.InputDefs()) { + input_names.emplace_back(input->Name()); + } + AddOperationInputs(*op, "values", input_names); + AddOperationInput(*op, "axis", model_builder.AddScalarConstant(op->type(), "axis", *axis)); + AddOperationInput(*op, "interleave", model_builder.AddScalarConstant(op->type(), "interleave", interleave)); + AddOperationOutput(*op, *node.OutputDefs()[0]); + model_builder.AddOperation(std::move(op)); + + } else +#endif // defined(COREML_ENABLE_MLPROGRAM) + { + std::unique_ptr layer = model_builder.CreateNNLayer(node); + + layer->mutable_concat()->set_sequenceconcat(false); + + for (const auto* input : node.InputDefs()) { + LOGS(logger, VERBOSE) << "input name " << input->Name(); + *layer->mutable_input()->Add() = input->Name(); + } + + *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name(); + + model_builder.AddLayer(std::move(layer)); } - - *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name(); - - model_builder.AddLayer(std::move(layer)); return Status::OK(); } -bool ConcatOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& /* input_params */, +bool ConcatOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params, const logging::Logger& logger) const { const auto& input_defs = node.InputDefs(); if (input_defs.size() < 2) { @@ -50,23 +76,25 @@ bool ConcatOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputPa if (!GetShape(*input_defs[0], input_shape, logger)) return false; - auto rank = input_shape.size(); - if (rank != 4) { - // For some reason, the concat in CoreML running on 3d tensor will concat on wrong axis - // Instead of concat on axis 0, it will concat on axis 1 - // Disable Concat support for 3d tensor for now - // TODO, add ExpandDims and Squeeze, 3d -ExpandDims-> 4d -> Concat -Squeeze-> 3d - LOGS(logger, VERBOSE) << "Concat only support 4d shape for now, input is " - << rank << "d shape"; - return false; - } - - NodeAttrHelper helper(node); - auto axis = static_cast(HandleNegativeAxis(helper.Get("axis", 1), rank)); - if (rank != axis + 3) { - LOGS(logger, VERBOSE) << "Concat only support axis to be -3, actual axis: " << axis - << ", actual rank: " << rank; - return false; + if (!input_params.create_mlprogram) { + auto rank = input_shape.size(); + if (rank != 4) { + // For some reason, the concat in CoreML running on 3d tensor will concat on wrong axis + // Instead of concat on axis 0, it will concat on axis 1 + // Disable Concat support for 3d tensor for now + // TODO, add ExpandDims and Squeeze, 3d -ExpandDims-> 4d -> Concat -Squeeze-> 3d + LOGS(logger, VERBOSE) << "Concat only support 4d shape for now, input is " + << rank << "d shape"; + return false; + } + + NodeAttrHelper helper(node); + auto axis = static_cast(HandleNegativeAxis(helper.Get("axis", 1), rank)); + if (rank != axis + 3) { + LOGS(logger, VERBOSE) << "Concat only support axis to be -3, actual axis: " << axis + << ", actual rank: " << rank; + return false; + } } return true; diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm index 1d506099b4367..a6df472385d42 100644 --- a/onnxruntime/core/providers/coreml/model/model.mm +++ b/onnxruntime/core/providers/coreml/model/model.mm @@ -26,6 +26,13 @@ #include "core/providers/coreml/model/objc_str_utils.h" #include "core/providers/coreml/shape_utils.h" +// manually enable to test logic for handling non-contiguous MLMultiArray as we don't have a unit test setup +// that can hit that. +// #define TEST_MLMULTIARRAY_HANDLING +#ifdef TEST_MLMULTIARRAY_HANDLING +#include +#endif + // force the linker to create a dependency on the CoreML framework so that in MAUI usage we don't need // to manually do this asm(".linker_option \"-framework\", \"CoreML\""); @@ -174,51 +181,197 @@ Status CreateInputFeatureProvider(const std::unordered_map= *block_size, "Logic error calculating copy info"); + ORT_ENFORCE(*stride * *num_blocks == total_elems, "Logic error calculating copy info"); + + return Status::OK(); +} + +#ifdef TEST_MLMULTIARRAY_HANDLING +void ValidateGetInfo(MLMultiArray* array, + int64_t expected_num_blocks, int64_t expected_block_size, int64_t expected_stride, bool valid) { + int64_t num_blocks = 0; + int64_t block_size = 0; + int64_t stride = 0; + auto status = GetMLMultiArrayCopyInfo(array, &num_blocks, &block_size, &stride); + + if (!valid) { + assert(!status.IsOK()); + return; + } + + assert(status.IsOK()); + assert(num_blocks == expected_num_blocks); + assert(block_size == expected_block_size); + assert(stride == expected_stride); +} + +void ValidateMLMultiArrayHandling() { + void* data = reinterpret_cast(0xfeedf00d); + + // dim -1 with stride + { + NSArray* shape = @[ @1, @1, @8, @8 ]; + NSArray* strides = @[ @128, @128, @16, @2 ]; + + auto* array = [[MLMultiArray alloc] initWithDataPointer:data + shape:shape + dataType:MLMultiArrayDataTypeInt32 + strides:strides + deallocator:^(void* /* bytes */) { + } + error:nil]; + ValidateGetInfo(array, 64, 1, 2, true); + } + + // dim -2 with stride + { + NSArray* shape = @[ @1, @1, @8, @8 ]; + NSArray* strides = @[ @128, @128, @16, @1 ]; + + auto* array = [[MLMultiArray alloc] initWithDataPointer:data + shape:shape + dataType:MLMultiArrayDataTypeInt32 + strides:strides + deallocator:^(void* /* bytes */) { + } + error:nil]; + ValidateGetInfo(array, 8, 8, 16, true); + } + + // dim -3 with stride + { + NSArray* shape = @[ @1, @2, @4, @4 ]; + NSArray* strides = @[ @48, @24, @4, @1 ]; + + auto* array = [[MLMultiArray alloc] initWithDataPointer:data + shape:shape + dataType:MLMultiArrayDataTypeInt32 + strides:strides + deallocator:^(void* /* bytes */) { + } + error:nil]; + + ValidateGetInfo(array, 2, 16, 24, true); + } + + // two non-contiguous dims + { + // dim + NSArray* shape = @[ @1, @2, @4, @4 ]; + NSArray* strides = @[ @96, @48, @8, @1 ]; + + auto* array = [[MLMultiArray alloc] initWithDataPointer:data + shape:shape + dataType:MLMultiArrayDataTypeInt32 + strides:strides + deallocator:^(void* /* bytes */) { + } + error:nil]; + + ValidateGetInfo(array, 0, 0, 0, false); + } } +#endif Status CopyMLMultiArrayBuffer(const void* mlmultiarray_buffer, void* tensor_buffer, - const MLMultiArray* array_info, - const OnnxTensorInfo* tensor_info, - const std::optional mlmultiarray_buffer_size) { + const MLMultiArray* array, + const int64_t num_blocks, const int64_t block_size, const int64_t stride, + const OnnxTensorInfo* tensor_info) { if (mlmultiarray_buffer == nullptr) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "mlmultiarray_buffer has no data"); } - const size_t num_elements = array_info.count; + // total including non-contiguous space + + int64_t array_total_elements = [array.strides[0] longLongValue] * [array.shape[0] longLongValue]; + const int64_t num_elements = array.count; + + ORT_RETURN_IF(array_total_elements != num_blocks * stride || + num_elements != num_blocks * block_size, + "MLMultiArray size does not match the copy info"); + const auto onnx_data_type = tensor_info->data_type; switch (onnx_data_type) { case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: { - const auto output_data_byte_size = num_elements * sizeof(float); - ORT_RETURN_IF_NOT(!mlmultiarray_buffer_size || mlmultiarray_buffer_size == output_data_byte_size, - "CoreML output buffer size and expected output size differ"); - memcpy(tensor_buffer, mlmultiarray_buffer, output_data_byte_size); + const auto* src_buffer = static_cast(mlmultiarray_buffer); + auto* dst_buffer = static_cast(tensor_buffer); + const auto block_byte_size = block_size * sizeof(float); + + for (int64_t idx = 0; idx < num_blocks; ++idx) { + memcpy(dst_buffer, src_buffer, block_byte_size); + src_buffer += stride; + dst_buffer += block_size; + } break; } case ONNX_NAMESPACE::TensorProto_DataType_INT32: { - const auto output_data_byte_size = num_elements * sizeof(int32_t); - ORT_RETURN_IF_NOT(!mlmultiarray_buffer_size || mlmultiarray_buffer_size == output_data_byte_size, - "CoreML output buffer size and expected output size differ"); - memcpy(tensor_buffer, mlmultiarray_buffer, output_data_byte_size); + const auto* src_buffer = static_cast(mlmultiarray_buffer); + auto* dst_buffer = static_cast(tensor_buffer); + const auto block_byte_size = block_size * sizeof(int32_t); + + for (int64_t idx = 0; idx < num_blocks; ++idx) { + memcpy(dst_buffer, src_buffer, block_byte_size); + src_buffer += stride; + dst_buffer += block_size; + } + break; } // For this case, since Coreml Spec only uses int32 for model output while onnx provides // int64 for model output data type. We are doing a type casting (int32 -> int64) here // when copying the model to ORT case ONNX_NAMESPACE::TensorProto_DataType_INT64: { - ORT_RETURN_IF_NOT(array_info.dataType == MLMultiArrayDataTypeInt32, - "CoreML output data type is not MLMultiArrayDataTypeInt32"); - ORT_RETURN_IF_NOT(!mlmultiarray_buffer_size || mlmultiarray_buffer_size == num_elements * sizeof(int32_t), - "CoreML output buffer size and expected output size differ"); - const auto model_output_span = gsl::span{static_cast(mlmultiarray_buffer), num_elements}; - const auto output_span = gsl::span{static_cast(tensor_buffer), num_elements}; - std::transform(model_output_span.begin(), model_output_span.end(), output_span.begin(), - [](int32_t v) { return static_cast(v); }); + ORT_RETURN_IF(array.dataType != MLMultiArrayDataTypeInt32, + "CoreML output data type is not MLMultiArrayDataTypeInt32"); + + const int32_t* src_buffer = static_cast(mlmultiarray_buffer); + int64_t* dst_buffer = static_cast(tensor_buffer); + + for (int64_t idx = 0; idx < num_blocks; ++idx) { + auto input_span = gsl::span{src_buffer, static_cast(block_size)}; + auto output_span = gsl::span{dst_buffer, static_cast(block_size)}; + std::transform(input_span.begin(), input_span.end(), output_span.begin(), + [](int32_t v) { return static_cast(v); }); + + src_buffer += stride; + dst_buffer += block_size; + } break; } default: @@ -250,8 +403,7 @@ - (void)dealloc; - (Status)loadModel API_AVAILABLE_COREML3; - (Status)predict:(const std::unordered_map&)inputs outputs:(const std::unordered_map&)outputs - getOutputTensorDataFn:(const GetOutputTensorMutableRawDataFn&) - get_output_tensor_mutable_raw_data_fn + getOutputTensorDataFn:(const GetOutputTensorMutableRawDataFn&)get_output_tensor_mutable_raw_data_fn API_AVAILABLE_COREML3; @property(nullable) MLModel* model API_AVAILABLE_COREML3; @@ -397,21 +549,27 @@ - (Status)predict:(const std::unordered_map&)inputs ") do not match"); } - ORT_RETURN_IF_NOT(IsArrayContiguous(data), - "Non-contiguous output MLMultiArray is not currently supported"); + // support a non-contiguous array, provided only one dimension is not contiguous + int64_t num_blocks = 0; + int64_t block_size = 0; + int64_t stride = 0; + + ORT_RETURN_IF_ERROR(GetMLMultiArrayCopyInfo(data, &num_blocks, &block_size, &stride)); + __block Status copy_status; const auto* tensor_info = &output_tensor_info; // `getBytesWithHandler` replaces deprecated `.dataPointer` on new versions if (@available(macOS 12.3, iOS 15.4, *)) { [data getBytesWithHandler:^(const void* bytes, NSInteger size) { - copy_status = CopyMLMultiArrayBuffer(bytes, output_buffer, data, tensor_info, size); + copy_status = CopyMLMultiArrayBuffer(bytes, output_buffer, data, + num_blocks, block_size, stride, tensor_info); }]; } else { - // disable size check as old API does not return buffer length - copy_status = CopyMLMultiArrayBuffer(data.dataPointer, output_buffer, data, tensor_info, std::nullopt); + copy_status = CopyMLMultiArrayBuffer(data.dataPointer, output_buffer, data, + num_blocks, block_size, stride, tensor_info); } - if (!copy_status.IsOK()) - return copy_status; + + ORT_RETURN_IF_ERROR(copy_status); } } } @@ -508,6 +666,11 @@ Status Predict(const std::unordered_map& inputs, Model::~Model() {} Status Model::LoadModel() { + // arbitrary place to run this when manually enabled for temporary testing +#ifdef TEST_MLMULTIARRAY_HANDLING + ValidateMLMultiArrayHandling(); +#endif + return execution_->LoadModel(); } diff --git a/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md b/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md index 3b3790ba06599..e94875f2481da 100644 --- a/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md +++ b/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md @@ -6,6 +6,7 @@ Keep in sync with doco generated from /docs/execution-providers/CoreML-Execution |ai.onnx:Add|| |ai.onnx:AveragePool|Only 2D Pool is supported currently. 3D and 5D support can be added if needed.| |ai.onnx:Clip|| +|ai.onnx:Concat|| |ai.onnx:Conv|Only 1D/2D Conv is supported.
Bias if provided must be constant.| |ai.onnx:Div|| |ai.onnx:Gemm|Input B must be constant.|