From 629071baed7e1e5357e15cac3d75b33589069c75 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 28 Aug 2024 14:52:50 -0700 Subject: [PATCH 01/84] Add Lora Parameters schema and script --- onnxruntime/lora/lora_format/README.md | 36 ++ .../lora/lora_format/compile_schema.py | 53 +++ onnxruntime/lora/lora_format/lora_schema.fbs | 51 +++ .../lora/lora_format/lora_schema.fbs.h | 338 ++++++++++++++++++ 4 files changed, 478 insertions(+) create mode 100644 onnxruntime/lora/lora_format/README.md create mode 100644 onnxruntime/lora/lora_format/compile_schema.py create mode 100644 onnxruntime/lora/lora_format/lora_schema.fbs create mode 100644 onnxruntime/lora/lora_format/lora_schema.fbs.h diff --git a/onnxruntime/lora/lora_format/README.md b/onnxruntime/lora/lora_format/README.md new file mode 100644 index 0000000000000..ec39974464c1c --- /dev/null +++ b/onnxruntime/lora/lora_format/README.md @@ -0,0 +1,36 @@ +# Lora Parameters Flatbuffer Schemas +This directory contains [ONNXRuntime Lora Parameter format schema](lora_schema.fbs) and [the generated C++ header file](lora_schema.fbs.h) for the +Lora Parameters file format. This file format is defined as means to deliver Lora parameters so it can read by ONNXRuntime C++ code. + +The format format is generally designed to house a single Lora adapter named Lora parameters. + +[ONNXRuntime Lora Parameter file format schema](lora_schema.fbs) uses the [FlatBuffers](https://github.com/google/flatbuffers) serialization library. + +Please do not directly modify the generated C++ header file for [ONNXRuntime Lora Parameter file format]((lora_schema.fbs.h)). + +Use flatc compiler for the purpose. + +e.g. + - Windows Debug build + - \build\Windows\Debug\_deps\flatbuffers-build\Debug\flatc.exe + - Linux Debug build + - /build/Linux/Debug/_deps/flatbuffers-build/flatc + +It is possible to use another flatc as well, e.g., from a separate installation. + +To update the flatbuffers schemas and generated files: +1. Modify [ONNXRuntime Lora Parameter file format schema](lora_schema.fbs). +2. Run [compile_schema.py](./compile_schema.py) to generate the C++ and Python bindings. + + ``` + python onnxruntime/lora/lora_format/compile_schema.py --flatc + ``` +# Lora format version history +In [lora_format_version.h](../lora_format_version.h), see `IsLoraParameterslVersionSupported()` for the supported versions and +`kLoraParametersVersion` for the current version. + +## Version 1 +History begins. + +Initial support for FlatBuffers that Lora Parameters support. This includes a definition of Tensor entity +so it can be saved in a tensor per file format. diff --git a/onnxruntime/lora/lora_format/compile_schema.py b/onnxruntime/lora/lora_format/compile_schema.py new file mode 100644 index 0000000000000..bee53885a2005 --- /dev/null +++ b/onnxruntime/lora/lora_format/compile_schema.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import argparse +import pathlib +import subprocess + +SCRIPT_DIR = pathlib.Path(__file__).parent.resolve() + +def generate_cpp(flatc: pathlib.Path, schema_path: pathlib.Path): + # run flatc to generate C++ code + cmd = [str(flatc), "--cpp", "--scoped-enums", "--filename-suffix", ".fbs", str(schema_path)] + subprocess.run(cmd, check=True, cwd=SCRIPT_DIR) + + +def main(): + parser = argparse.ArgumentParser( + description="Generate language bindings for the ORT flatbuffers schema.", + usage="Provide the path to the flatbuffers flatc executable. " + "Script can be executed from anywhere but must be located in its original " + "directory in the ONNX Runtime enlistment.", + ) + + parser.add_argument( + "-f", + "--flatc", + required=True, + type=pathlib.Path, + help="Path to flatbuffers flatc executable. " + "Can be found in the build directory under _deps/flatbuffers-build//", + ) + + all_languages = ["cpp"] + parser.add_argument( + "-l", + "--language", + action="append", + dest="languages", + choices=all_languages, + help="Specify which language bindings to generate.", + ) + + args = parser.parse_args() + languages = args.languages if args.languages is not None else all_languages + flatc = args.flatc.resolve(strict=True) + schema_path = SCRIPT_DIR / "lora_schema.fbs" + + if "cpp" in languages: + generate_cpp(flatc, schema_path) + +if __name__ == "__main__": + main() diff --git a/onnxruntime/lora/lora_format/lora_schema.fbs b/onnxruntime/lora/lora_format/lora_schema.fbs new file mode 100644 index 0000000000000..9079211ae80a6 --- /dev/null +++ b/onnxruntime/lora/lora_format/lora_schema.fbs @@ -0,0 +1,51 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace onnxruntime.lora_format; + +// Tensor +enum TensorDataType : int32 { + UNDEFINED = 0, + FLOAT = 1, + UINT8 = 2, + INT8 = 3, + UINT16 = 4, + INT16 = 5, + INT32 = 6, + INT64 = 7, + STRING = 8, + BOOL = 9, + FLOAT16 = 10, + DOUBLE = 11, + UINT32 = 12, + UINT64 = 13, + COMPLEX64 = 14, + COMPLEX128 = 15, + BFLOAT16 = 16, + FLOAT8E4M3FN = 17, + FLOAT8E4M3FNUZ = 18, + FLOAT8E5M2 = 19, + FLOAT8E5M2FNUZ = 20, +} + +// For simplicity, we will have only have one data field +// - raw_data for all primitive types. +// We do not foresee strings as parameters. +table Parameter { + name:string; + + dims:[int64]; + data_type:TensorDataType; + + raw_data:[uint8]; +} + +table Adapter { + format_version:int; + adapter_version:int; + model_version:int; + parameters:[Parameter]; +} + +root_type Adapter; +file_identifier "GAIL"; diff --git a/onnxruntime/lora/lora_format/lora_schema.fbs.h b/onnxruntime/lora/lora_format/lora_schema.fbs.h new file mode 100644 index 0000000000000..a70bb36a0aa68 --- /dev/null +++ b/onnxruntime/lora/lora_format/lora_schema.fbs.h @@ -0,0 +1,338 @@ +// automatically generated by the FlatBuffers compiler, do not modify + + +#ifndef FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_FORMAT_H_ +#define FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_FORMAT_H_ + +#include "flatbuffers/flatbuffers.h" + +// Ensure the included flatbuffers.h is the same version as when this file was +// generated, otherwise it may not be compatible. +static_assert(FLATBUFFERS_VERSION_MAJOR == 23 && + FLATBUFFERS_VERSION_MINOR == 5 && + FLATBUFFERS_VERSION_REVISION == 26, + "Non-compatible flatbuffers version included"); + +namespace onnxruntime { +namespace lora_format { + +struct Parameter; +struct ParameterBuilder; + +struct Adapter; +struct AdapterBuilder; + +enum class TensorDataType : int32_t { + UNDEFINED = 0, + FLOAT = 1, + UINT8 = 2, + INT8 = 3, + UINT16 = 4, + INT16 = 5, + INT32 = 6, + INT64 = 7, + STRING = 8, + BOOL = 9, + FLOAT16 = 10, + DOUBLE = 11, + UINT32 = 12, + UINT64 = 13, + COMPLEX64 = 14, + COMPLEX128 = 15, + BFLOAT16 = 16, + FLOAT8E4M3FN = 17, + FLOAT8E4M3FNUZ = 18, + FLOAT8E5M2 = 19, + FLOAT8E5M2FNUZ = 20, + MIN = UNDEFINED, + MAX = FLOAT8E5M2FNUZ +}; + +inline const TensorDataType (&EnumValuesTensorDataType())[21] { + static const TensorDataType values[] = { + TensorDataType::UNDEFINED, + TensorDataType::FLOAT, + TensorDataType::UINT8, + TensorDataType::INT8, + TensorDataType::UINT16, + TensorDataType::INT16, + TensorDataType::INT32, + TensorDataType::INT64, + TensorDataType::STRING, + TensorDataType::BOOL, + TensorDataType::FLOAT16, + TensorDataType::DOUBLE, + TensorDataType::UINT32, + TensorDataType::UINT64, + TensorDataType::COMPLEX64, + TensorDataType::COMPLEX128, + TensorDataType::BFLOAT16, + TensorDataType::FLOAT8E4M3FN, + TensorDataType::FLOAT8E4M3FNUZ, + TensorDataType::FLOAT8E5M2, + TensorDataType::FLOAT8E5M2FNUZ + }; + return values; +} + +inline const char * const *EnumNamesTensorDataType() { + static const char * const names[22] = { + "UNDEFINED", + "FLOAT", + "UINT8", + "INT8", + "UINT16", + "INT16", + "INT32", + "INT64", + "STRING", + "BOOL", + "FLOAT16", + "DOUBLE", + "UINT32", + "UINT64", + "COMPLEX64", + "COMPLEX128", + "BFLOAT16", + "FLOAT8E4M3FN", + "FLOAT8E4M3FNUZ", + "FLOAT8E5M2", + "FLOAT8E5M2FNUZ", + nullptr + }; + return names; +} + +inline const char *EnumNameTensorDataType(TensorDataType e) { + if (::flatbuffers::IsOutRange(e, TensorDataType::UNDEFINED, TensorDataType::FLOAT8E5M2FNUZ)) return ""; + const size_t index = static_cast(e); + return EnumNamesTensorDataType()[index]; +} + +struct Parameter FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ParameterBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_NAME = 4, + VT_DIMS = 6, + VT_DATA_TYPE = 8, + VT_RAW_DATA = 10 + }; + const ::flatbuffers::String *name() const { + return GetPointer(VT_NAME); + } + const ::flatbuffers::Vector *dims() const { + return GetPointer *>(VT_DIMS); + } + onnxruntime::lora_format::TensorDataType data_type() const { + return static_cast(GetField(VT_DATA_TYPE, 0)); + } + const ::flatbuffers::Vector *raw_data() const { + return GetPointer *>(VT_RAW_DATA); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_NAME) && + verifier.VerifyString(name()) && + VerifyOffset(verifier, VT_DIMS) && + verifier.VerifyVector(dims()) && + VerifyField(verifier, VT_DATA_TYPE, 4) && + VerifyOffset(verifier, VT_RAW_DATA) && + verifier.VerifyVector(raw_data()) && + verifier.EndTable(); + } +}; + +struct ParameterBuilder { + typedef Parameter Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_name(::flatbuffers::Offset<::flatbuffers::String> name) { + fbb_.AddOffset(Parameter::VT_NAME, name); + } + void add_dims(::flatbuffers::Offset<::flatbuffers::Vector> dims) { + fbb_.AddOffset(Parameter::VT_DIMS, dims); + } + void add_data_type(onnxruntime::lora_format::TensorDataType data_type) { + fbb_.AddElement(Parameter::VT_DATA_TYPE, static_cast(data_type), 0); + } + void add_raw_data(::flatbuffers::Offset<::flatbuffers::Vector> raw_data) { + fbb_.AddOffset(Parameter::VT_RAW_DATA, raw_data); + } + explicit ParameterBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateParameter( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::String> name = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> dims = 0, + onnxruntime::lora_format::TensorDataType data_type = onnxruntime::lora_format::TensorDataType::UNDEFINED, + ::flatbuffers::Offset<::flatbuffers::Vector> raw_data = 0) { + ParameterBuilder builder_(_fbb); + builder_.add_raw_data(raw_data); + builder_.add_data_type(data_type); + builder_.add_dims(dims); + builder_.add_name(name); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateParameterDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const char *name = nullptr, + const std::vector *dims = nullptr, + onnxruntime::lora_format::TensorDataType data_type = onnxruntime::lora_format::TensorDataType::UNDEFINED, + const std::vector *raw_data = nullptr) { + auto name__ = name ? _fbb.CreateString(name) : 0; + auto dims__ = dims ? _fbb.CreateVector(*dims) : 0; + auto raw_data__ = raw_data ? _fbb.CreateVector(*raw_data) : 0; + return onnxruntime::lora_format::CreateParameter( + _fbb, + name__, + dims__, + data_type, + raw_data__); +} + +struct Adapter FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef AdapterBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FORMAT_VERSION = 4, + VT_ADAPTER_VERSION = 6, + VT_MODEL_VERSION = 8, + VT_PARAMETERS = 10 + }; + int32_t format_version() const { + return GetField(VT_FORMAT_VERSION, 0); + } + int32_t adapter_version() const { + return GetField(VT_ADAPTER_VERSION, 0); + } + int32_t model_version() const { + return GetField(VT_MODEL_VERSION, 0); + } + const ::flatbuffers::Vector<::flatbuffers::Offset> *parameters() const { + return GetPointer> *>(VT_PARAMETERS); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FORMAT_VERSION, 4) && + VerifyField(verifier, VT_ADAPTER_VERSION, 4) && + VerifyField(verifier, VT_MODEL_VERSION, 4) && + VerifyOffset(verifier, VT_PARAMETERS) && + verifier.VerifyVector(parameters()) && + verifier.VerifyVectorOfTables(parameters()) && + verifier.EndTable(); + } +}; + +struct AdapterBuilder { + typedef Adapter Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_format_version(int32_t format_version) { + fbb_.AddElement(Adapter::VT_FORMAT_VERSION, format_version, 0); + } + void add_adapter_version(int32_t adapter_version) { + fbb_.AddElement(Adapter::VT_ADAPTER_VERSION, adapter_version, 0); + } + void add_model_version(int32_t model_version) { + fbb_.AddElement(Adapter::VT_MODEL_VERSION, model_version, 0); + } + void add_parameters(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> parameters) { + fbb_.AddOffset(Adapter::VT_PARAMETERS, parameters); + } + explicit AdapterBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateAdapter( + ::flatbuffers::FlatBufferBuilder &_fbb, + int32_t format_version = 0, + int32_t adapter_version = 0, + int32_t model_version = 0, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> parameters = 0) { + AdapterBuilder builder_(_fbb); + builder_.add_parameters(parameters); + builder_.add_model_version(model_version); + builder_.add_adapter_version(adapter_version); + builder_.add_format_version(format_version); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateAdapterDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + int32_t format_version = 0, + int32_t adapter_version = 0, + int32_t model_version = 0, + const std::vector<::flatbuffers::Offset> *parameters = nullptr) { + auto parameters__ = parameters ? _fbb.CreateVector<::flatbuffers::Offset>(*parameters) : 0; + return onnxruntime::lora_format::CreateAdapter( + _fbb, + format_version, + adapter_version, + model_version, + parameters__); +} + +inline const onnxruntime::lora_format::Adapter *GetAdapter(const void *buf) { + return ::flatbuffers::GetRoot(buf); +} + +inline const onnxruntime::lora_format::Adapter *GetSizePrefixedAdapter(const void *buf) { + return ::flatbuffers::GetSizePrefixedRoot(buf); +} + +inline const char *AdapterIdentifier() { + return "GAIL"; +} + +inline bool AdapterBufferHasIdentifier(const void *buf) { + return ::flatbuffers::BufferHasIdentifier( + buf, AdapterIdentifier()); +} + +inline bool SizePrefixedAdapterBufferHasIdentifier(const void *buf) { + return ::flatbuffers::BufferHasIdentifier( + buf, AdapterIdentifier(), true); +} + +inline bool VerifyAdapterBuffer( + ::flatbuffers::Verifier &verifier) { + return verifier.VerifyBuffer(AdapterIdentifier()); +} + +inline bool VerifySizePrefixedAdapterBuffer( + ::flatbuffers::Verifier &verifier) { + return verifier.VerifySizePrefixedBuffer(AdapterIdentifier()); +} + +inline void FinishAdapterBuffer( + ::flatbuffers::FlatBufferBuilder &fbb, + ::flatbuffers::Offset root) { + fbb.Finish(root, AdapterIdentifier()); +} + +inline void FinishSizePrefixedAdapterBuffer( + ::flatbuffers::FlatBufferBuilder &fbb, + ::flatbuffers::Offset root) { + fbb.FinishSizePrefixed(root, AdapterIdentifier()); +} + +} // namespace lora_format +} // namespace onnxruntime + +#endif // FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_FORMAT_H_ From 7837eea1246f004add283ca1b979fb7bef1886c9 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 29 Aug 2024 17:22:36 -0700 Subject: [PATCH 02/84] Add onnxruntime_lora static lib --- cmake/CMakeLists.txt | 2 +- cmake/onnxruntime_lora.cmake | 30 ++++ cmake/onnxruntime_python.cmake | 1 + cmake/onnxruntime_unittests.cmake | 12 +- cmake/onnxruntime_webassembly.cmake | 2 + cmake/winml_unittests.cmake | 2 +- onnxruntime/lora/lora_adapters.cc | 35 +++++ onnxruntime/lora/lora_adapters.h | 74 +++++++++ onnxruntime/lora/lora_format/lora_schema.fbs | 2 +- .../lora/lora_format/lora_schema.fbs.h | 144 +++++++++--------- onnxruntime/lora/lora_format_utils.cc | 111 ++++++++++++++ onnxruntime/lora/lora_format_utils.h | 87 +++++++++++ onnxruntime/lora/lora_format_version.h | 33 ++++ 13 files changed, 455 insertions(+), 80 deletions(-) create mode 100644 cmake/onnxruntime_lora.cmake create mode 100644 onnxruntime/lora/lora_adapters.cc create mode 100644 onnxruntime/lora/lora_adapters.h create mode 100644 onnxruntime/lora/lora_format_utils.cc create mode 100644 onnxruntime/lora/lora_format_utils.h create mode 100644 onnxruntime/lora/lora_format_version.h diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 2e9a50e522171..0ef18f56b1917 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -1690,7 +1690,7 @@ endif() #Now the 'onnxruntime_EXTERNAL_LIBRARIES' variable should be sealed. It will be used in onnxruntime.cmake which will be included in the next. #The order of the following targets matters. Right depends on left. If target A appears before target B. Then A.cmake can not use variables defined in B.cmake. -set(ONNXRUNTIME_CMAKE_FILES onnxruntime_flatbuffers onnxruntime_common onnxruntime_mlas onnxruntime_graph onnxruntime_framework onnxruntime_util onnxruntime_providers onnxruntime_optimizer onnxruntime_session ${ONNXRUNTIME_EAGER_CMAKE_FILE_NAME}) +set(ONNXRUNTIME_CMAKE_FILES onnxruntime_flatbuffers onnxruntime_common onnxruntime_mlas onnxruntime_graph onnxruntime_lora onnxruntime_framework onnxruntime_util onnxruntime_providers onnxruntime_optimizer onnxruntime_session ${ONNXRUNTIME_EAGER_CMAKE_FILE_NAME}) if (onnxruntime_USE_WINML) # WINML uses and depends on the shared lib. Note: You can build WINML without DML and you will get a diff --git a/cmake/onnxruntime_lora.cmake b/cmake/onnxruntime_lora.cmake new file mode 100644 index 0000000000000..3f99e230031ad --- /dev/null +++ b/cmake/onnxruntime_lora.cmake @@ -0,0 +1,30 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +file(GLOB onnxruntime_lora_srcs CONFIGURE_DEPENDS + "${ONNXRUNTIME_ROOT}/lora_format/*.h" + "${ONNXRUNTIME_ROOT}/lora/*.h" + "${ONNXRUNTIME_ROOT}/lora/*.cc" + ) + +source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_lora_srcs}) + +onnxruntime_add_static_library(onnxruntime_lora ${onnxruntime_lora_srcs}) +onnxruntime_add_include_to_target(onnxruntime_lora onnx flatbuffers::flatbuffers ${GSL_TARGET}) +target_link_libraries(onnxruntime_lora onnxruntime_framework) + +if(onnxruntime_ENABLE_INSTRUMENT) + target_compile_definitions(onnxruntime_lora PUBLIC ONNXRUNTIME_ENABLE_INSTRUMENT) +endif() + +target_include_directories(onnxruntime_lora PRIVATE ${ONNXRUNTIME_ROOT}) +add_dependencies(onnxruntime_lora ${onnxruntime_EXTERNAL_DEPENDENCIES}) +set_target_properties(onnxruntime_lora PROPERTIES FOLDER "ONNXRuntime") + +if (NOT onnxruntime_BUILD_SHARED_LIB) + install(TARGETS onnxruntime_lora + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR}) +endif() diff --git a/cmake/onnxruntime_python.cmake b/cmake/onnxruntime_python.cmake index b2dbe4b3da5e8..b8d8a6f58b118 100644 --- a/cmake/onnxruntime_python.cmake +++ b/cmake/onnxruntime_python.cmake @@ -184,6 +184,7 @@ target_link_libraries(onnxruntime_pybind11_state PRIVATE onnxruntime_providers onnxruntime_util ${onnxruntime_tvm_libs} + onnxruntime_lora onnxruntime_framework onnxruntime_util onnxruntime_graph diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index d7f4a0675e118..87a698af616d0 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -612,6 +612,7 @@ set(ONNXRUNTIME_TEST_LIBS onnxruntime_providers onnxruntime_util ${onnxruntime_tvm_libs} + onnxruntime_lora onnxruntime_framework onnxruntime_util onnxruntime_graph @@ -1223,7 +1224,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) #onnxruntime_common is kind of ok because it is thin, tiny and totally stateless. set(onnxruntime_perf_test_libs onnx_test_runner_common onnxruntime_test_utils onnxruntime_common - onnxruntime onnxruntime_flatbuffers onnx_test_data_proto + onnxruntime onnxruntime_lora onnxruntime_flatbuffers onnx_test_data_proto ${onnxruntime_EXTERNAL_LIBRARIES} ${GETOPT_LIB_WIDE} ${SYS_PATH_LIB} ${CMAKE_DL_LIBS}) if(NOT WIN32) @@ -1236,7 +1237,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) list(APPEND onnxruntime_perf_test_libs ${android_shared_libs}) endif() if (${CMAKE_SYSTEM_NAME} MATCHES "AIX") - list(APPEND onnxruntime_perf_test_libs onnxruntime_graph onnxruntime_session onnxruntime_providers onnxruntime_framework onnxruntime_util onnxruntime_mlas onnxruntime_optimizer onnxruntime_flatbuffers iconv re2 gtest absl_failure_signal_handler absl_examine_stack absl_flags_parse absl_flags_usage absl_flags_usage_internal) + list(APPEND onnxruntime_perf_test_libs onnxruntime_graph onnxruntime_session onnxruntime_providers onnxruntime_lora onnxruntime_framework onnxruntime_util onnxruntime_mlas onnxruntime_optimizer onnxruntime_flatbuffers iconv re2 gtest absl_failure_signal_handler absl_examine_stack absl_flags_parse absl_flags_usage absl_flags_usage_internal) endif() target_link_libraries(onnxruntime_perf_test PRIVATE ${onnxruntime_perf_test_libs} Threads::Threads) if(WIN32) @@ -1289,7 +1290,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) endif() if (${CMAKE_SYSTEM_NAME} MATCHES "AIX") - list(APPEND onnxruntime_shared_lib_test_LIBS onnxruntime_graph onnxruntime_session onnxruntime_providers onnxruntime_framework onnxruntime_util onnxruntime_mlas onnxruntime_optimizer onnxruntime_flatbuffers iconv re2) + list(APPEND onnxruntime_shared_lib_test_LIBS onnxruntime_graph onnxruntime_session onnxruntime_providers onnxruntime_framework onnxruntime_util onnxruntime_mlas onnxruntime_optimizer onnxruntime_lora onnxruntime_flatbuffers iconv re2) endif() AddTest(DYN @@ -1472,6 +1473,7 @@ endif() onnxruntime_optimizer onnxruntime_providers onnxruntime_util + onnxruntime_lora onnxruntime_framework onnxruntime_util onnxruntime_graph @@ -1592,7 +1594,7 @@ if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten") list(APPEND onnxruntime_customopregistration_test_LIBS ${TENSORRT_LIBRARY_INFER}) endif() if (${CMAKE_SYSTEM_NAME} MATCHES "AIX") - list(APPEND onnxruntime_customopregistration_test_LIBS onnxruntime_graph onnxruntime_session onnxruntime_providers onnxruntime_framework onnxruntime_util onnxruntime_mlas onnxruntime_optimizer onnxruntime_flatbuffers iconv re2 libprotobuf-lite onnx_proto nsync_cpp) + list(APPEND onnxruntime_customopregistration_test_LIBS onnxruntime_graph onnxruntime_session onnxruntime_providers onnxruntime_lora onnxruntime_framework onnxruntime_util onnxruntime_mlas onnxruntime_optimizer onnxruntime_flatbuffers iconv re2 libprotobuf-lite onnx_proto nsync_cpp) endif() AddTest(DYN TARGET onnxruntime_customopregistration_test @@ -1711,7 +1713,7 @@ if (onnxruntime_BUILD_SHARED_LIB AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" set(onnxruntime_logging_apis_test_LIBS onnxruntime_common onnxruntime_test_utils) if (${CMAKE_SYSTEM_NAME} MATCHES "AIX") - list(APPEND onnxruntime_logging_apis_test_LIBS onnxruntime_session onnxruntime_util onnxruntime_framework onnxruntime_common onnxruntime_graph onnxruntime_providers onnxruntime_mlas onnxruntime_optimizer onnxruntime_flatbuffers iconv re2 libprotobuf-lite onnx_proto nsync_cpp) + list(APPEND onnxruntime_logging_apis_test_LIBS onnxruntime_session onnxruntime_util onnxruntime_lora onnxruntime_framework onnxruntime_common onnxruntime_graph onnxruntime_providers onnxruntime_mlas onnxruntime_optimizer onnxruntime_flatbuffers iconv re2 libprotobuf-lite onnx_proto nsync_cpp) endif() if(NOT WIN32) diff --git a/cmake/onnxruntime_webassembly.cmake b/cmake/onnxruntime_webassembly.cmake index 0686b66876d9f..3a1576065205f 100644 --- a/cmake/onnxruntime_webassembly.cmake +++ b/cmake/onnxruntime_webassembly.cmake @@ -102,6 +102,7 @@ if (onnxruntime_BUILD_WEBASSEMBLY_STATIC_LIB) onnx onnx_proto onnxruntime_common + onnxruntime_lora onnxruntime_flatbuffers onnxruntime_framework onnxruntime_graph @@ -179,6 +180,7 @@ else() onnx onnx_proto onnxruntime_common + onnxruntime_lora onnxruntime_flatbuffers onnxruntime_framework onnxruntime_graph diff --git a/cmake/winml_unittests.cmake b/cmake/winml_unittests.cmake index b655e60a8aec9..68acac584f2c0 100644 --- a/cmake/winml_unittests.cmake +++ b/cmake/winml_unittests.cmake @@ -166,7 +166,7 @@ function (get_winml_test_model_src "${winml_test_src_path}/model/*.cpp") set(${output_winml_test_model_src} ${winml_test_model_src} PARENT_SCOPE) set(${winml_test_model_libs} onnx_test_data_proto onnx_test_runner_common onnxruntime_common onnxruntime_mlas - onnxruntime_graph onnxruntime_test_utils onnxruntime_framework onnxruntime_util onnxruntime_flatbuffers PARENT_SCOPE) + onnxruntime_graph onnxruntime_test_utils onnxruntime_lora onnxruntime_framework onnxruntime_util onnxruntime_flatbuffers PARENT_SCOPE) endfunction() file(GLOB winml_test_common_src CONFIGURE_DEPENDS diff --git a/onnxruntime/lora/lora_adapters.cc b/onnxruntime/lora/lora_adapters.cc new file mode 100644 index 0000000000000..1425690b41660 --- /dev/null +++ b/onnxruntime/lora/lora_adapters.cc @@ -0,0 +1,35 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "lora_adapters.h" +#include "lora_format_utils.h" + +#include +#include + +namespace onnxruntime { +namespace lora { +namespace details { + +LoraParam::LoraParam(std::string name, OrtValue ort_value) + : name_(std::move(name)), ort_value_(std::move(ort_value)) {} + +void BinaryFormatHolder::Load(const std::filesystem::path& file_path) { + auto buffer = utils::LoadLoraAdapterBytes(file_path); + adapter_ = utils::ValidateAndGetAdapterFromBytes(buffer); + buffer_.emplace(std::move(buffer)); +} + +size_t BinaryFormatHolder::GetSize() const { + if (std::holds_alternative(buffer_)) { + return std::get<0>(buffer_).file_size_; + } else if (std::holds_alternative(buffer_)) { + return std::get<1>(buffer_).buffer_.size(); + } + ORT_THROW("Non-exhaustive visitor for BinaryFormatHolder::GetSize()"); +} + +} // namespace details + +} // namespace lora +} // namespace onnxruntime \ No newline at end of file diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h new file mode 100644 index 0000000000000..5966e14248fb0 --- /dev/null +++ b/onnxruntime/lora/lora_adapters.h @@ -0,0 +1,74 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/platform/env.h" +#include "core/framework/ort_value.h" + +#include +#include +#include +#include + +namespace onnxruntime { +namespace lora { + +struct Adapter; + +namespace details { +// This class takes hold of the serialized parameters that +// are either loaded from disk or mapped from disk (coming in the future) +// This data is always in host memory. +class BinaryFormatHolder { + public: + BinaryFormatHolder() = default; + BinaryFormatHolder(const BinaryFormatHolder&) = delete; + BinaryFormatHolder& operator=(const BinaryFormatHolder&) = delete; + + /// + /// Load parameters from a flatbuffer file. + /// + /// file name that can be opened + void Load(const std::filesystem::path& file_path); + + void MemoryMapFile(const std::string& file_name); + + // Get Flatbuffer object pointer + const Adapter* GetParameters() const noexcept { return adapter_; } + + // Get the size of the buffer + size_t GetSize() const; + + private: + struct BufferHolder { + explicit BufferHolder(std::vector buffer) : buffer_(std::move(buffer)) {} + std::vector buffer_; + }; + + struct MemMapHolder { + MemMapHolder(Env::MappedMemoryPtr mapped_memory, size_t file_size) + : mapped_memory_(std::move(mapped_memory)), file_size_(file_size) {} + Env::MappedMemoryPtr mapped_memory_; + size_t file_size_; + }; + + std::variant buffer_; + const Adapter* adapter_; +}; + +/// +/// Represents a named lora parameter (tensor) +/// +struct LoraParam { + LoraParam() = default; + LoraParam(std::string name, OrtValue parameter); + + std::string name_; + OrtValue ort_value_; +}; + +} // namespace details + +} // namespace lora +} // namespace onnxruntime diff --git a/onnxruntime/lora/lora_format/lora_schema.fbs b/onnxruntime/lora/lora_format/lora_schema.fbs index 9079211ae80a6..073fe0945517d 100644 --- a/onnxruntime/lora/lora_format/lora_schema.fbs +++ b/onnxruntime/lora/lora_format/lora_schema.fbs @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -namespace onnxruntime.lora_format; +namespace onnxruntime.lora; // Tensor enum TensorDataType : int32 { diff --git a/onnxruntime/lora/lora_format/lora_schema.fbs.h b/onnxruntime/lora/lora_format/lora_schema.fbs.h index a70bb36a0aa68..72b27dd355814 100644 --- a/onnxruntime/lora/lora_format/lora_schema.fbs.h +++ b/onnxruntime/lora/lora_format/lora_schema.fbs.h @@ -1,8 +1,8 @@ // automatically generated by the FlatBuffers compiler, do not modify -#ifndef FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_FORMAT_H_ -#define FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_FORMAT_H_ +#ifndef FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_H_ +#define FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_H_ #include "flatbuffers/flatbuffers.h" @@ -14,7 +14,7 @@ static_assert(FLATBUFFERS_VERSION_MAJOR == 23 && "Non-compatible flatbuffers version included"); namespace onnxruntime { -namespace lora_format { +namespace lora { struct Parameter; struct ParameterBuilder; @@ -22,55 +22,55 @@ struct ParameterBuilder; struct Adapter; struct AdapterBuilder; -enum class TensorDataType : int32_t { - UNDEFINED = 0, - FLOAT = 1, - UINT8 = 2, - INT8 = 3, - UINT16 = 4, - INT16 = 5, - INT32 = 6, - INT64 = 7, - STRING = 8, - BOOL = 9, - FLOAT16 = 10, - DOUBLE = 11, - UINT32 = 12, - UINT64 = 13, - COMPLEX64 = 14, - COMPLEX128 = 15, - BFLOAT16 = 16, - FLOAT8E4M3FN = 17, - FLOAT8E4M3FNUZ = 18, - FLOAT8E5M2 = 19, - FLOAT8E5M2FNUZ = 20, - MIN = UNDEFINED, - MAX = FLOAT8E5M2FNUZ +enum TensorDataType : int32_t { + TensorDataType_UNDEFINED = 0, + TensorDataType_FLOAT = 1, + TensorDataType_UINT8 = 2, + TensorDataType_INT8 = 3, + TensorDataType_UINT16 = 4, + TensorDataType_INT16 = 5, + TensorDataType_INT32 = 6, + TensorDataType_INT64 = 7, + TensorDataType_STRING = 8, + TensorDataType_BOOL = 9, + TensorDataType_FLOAT16 = 10, + TensorDataType_DOUBLE = 11, + TensorDataType_UINT32 = 12, + TensorDataType_UINT64 = 13, + TensorDataType_COMPLEX64 = 14, + TensorDataType_COMPLEX128 = 15, + TensorDataType_BFLOAT16 = 16, + TensorDataType_FLOAT8E4M3FN = 17, + TensorDataType_FLOAT8E4M3FNUZ = 18, + TensorDataType_FLOAT8E5M2 = 19, + TensorDataType_FLOAT8E5M2FNUZ = 20, + TensorDataType_MIN = TensorDataType_UNDEFINED, + TensorDataType_MAX = TensorDataType_FLOAT8E5M2FNUZ }; inline const TensorDataType (&EnumValuesTensorDataType())[21] { static const TensorDataType values[] = { - TensorDataType::UNDEFINED, - TensorDataType::FLOAT, - TensorDataType::UINT8, - TensorDataType::INT8, - TensorDataType::UINT16, - TensorDataType::INT16, - TensorDataType::INT32, - TensorDataType::INT64, - TensorDataType::STRING, - TensorDataType::BOOL, - TensorDataType::FLOAT16, - TensorDataType::DOUBLE, - TensorDataType::UINT32, - TensorDataType::UINT64, - TensorDataType::COMPLEX64, - TensorDataType::COMPLEX128, - TensorDataType::BFLOAT16, - TensorDataType::FLOAT8E4M3FN, - TensorDataType::FLOAT8E4M3FNUZ, - TensorDataType::FLOAT8E5M2, - TensorDataType::FLOAT8E5M2FNUZ + TensorDataType_UNDEFINED, + TensorDataType_FLOAT, + TensorDataType_UINT8, + TensorDataType_INT8, + TensorDataType_UINT16, + TensorDataType_INT16, + TensorDataType_INT32, + TensorDataType_INT64, + TensorDataType_STRING, + TensorDataType_BOOL, + TensorDataType_FLOAT16, + TensorDataType_DOUBLE, + TensorDataType_UINT32, + TensorDataType_UINT64, + TensorDataType_COMPLEX64, + TensorDataType_COMPLEX128, + TensorDataType_BFLOAT16, + TensorDataType_FLOAT8E4M3FN, + TensorDataType_FLOAT8E4M3FNUZ, + TensorDataType_FLOAT8E5M2, + TensorDataType_FLOAT8E5M2FNUZ }; return values; } @@ -104,7 +104,7 @@ inline const char * const *EnumNamesTensorDataType() { } inline const char *EnumNameTensorDataType(TensorDataType e) { - if (::flatbuffers::IsOutRange(e, TensorDataType::UNDEFINED, TensorDataType::FLOAT8E5M2FNUZ)) return ""; + if (::flatbuffers::IsOutRange(e, TensorDataType_UNDEFINED, TensorDataType_FLOAT8E5M2FNUZ)) return ""; const size_t index = static_cast(e); return EnumNamesTensorDataType()[index]; } @@ -123,8 +123,8 @@ struct Parameter FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { const ::flatbuffers::Vector *dims() const { return GetPointer *>(VT_DIMS); } - onnxruntime::lora_format::TensorDataType data_type() const { - return static_cast(GetField(VT_DATA_TYPE, 0)); + onnxruntime::lora::TensorDataType data_type() const { + return static_cast(GetField(VT_DATA_TYPE, 0)); } const ::flatbuffers::Vector *raw_data() const { return GetPointer *>(VT_RAW_DATA); @@ -152,7 +152,7 @@ struct ParameterBuilder { void add_dims(::flatbuffers::Offset<::flatbuffers::Vector> dims) { fbb_.AddOffset(Parameter::VT_DIMS, dims); } - void add_data_type(onnxruntime::lora_format::TensorDataType data_type) { + void add_data_type(onnxruntime::lora::TensorDataType data_type) { fbb_.AddElement(Parameter::VT_DATA_TYPE, static_cast(data_type), 0); } void add_raw_data(::flatbuffers::Offset<::flatbuffers::Vector> raw_data) { @@ -173,7 +173,7 @@ inline ::flatbuffers::Offset CreateParameter( ::flatbuffers::FlatBufferBuilder &_fbb, ::flatbuffers::Offset<::flatbuffers::String> name = 0, ::flatbuffers::Offset<::flatbuffers::Vector> dims = 0, - onnxruntime::lora_format::TensorDataType data_type = onnxruntime::lora_format::TensorDataType::UNDEFINED, + onnxruntime::lora::TensorDataType data_type = onnxruntime::lora::TensorDataType_UNDEFINED, ::flatbuffers::Offset<::flatbuffers::Vector> raw_data = 0) { ParameterBuilder builder_(_fbb); builder_.add_raw_data(raw_data); @@ -187,12 +187,12 @@ inline ::flatbuffers::Offset CreateParameterDirect( ::flatbuffers::FlatBufferBuilder &_fbb, const char *name = nullptr, const std::vector *dims = nullptr, - onnxruntime::lora_format::TensorDataType data_type = onnxruntime::lora_format::TensorDataType::UNDEFINED, + onnxruntime::lora::TensorDataType data_type = onnxruntime::lora::TensorDataType_UNDEFINED, const std::vector *raw_data = nullptr) { auto name__ = name ? _fbb.CreateString(name) : 0; auto dims__ = dims ? _fbb.CreateVector(*dims) : 0; auto raw_data__ = raw_data ? _fbb.CreateVector(*raw_data) : 0; - return onnxruntime::lora_format::CreateParameter( + return onnxruntime::lora::CreateParameter( _fbb, name__, dims__, @@ -217,8 +217,8 @@ struct Adapter FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { int32_t model_version() const { return GetField(VT_MODEL_VERSION, 0); } - const ::flatbuffers::Vector<::flatbuffers::Offset> *parameters() const { - return GetPointer> *>(VT_PARAMETERS); + const ::flatbuffers::Vector<::flatbuffers::Offset> *parameters() const { + return GetPointer> *>(VT_PARAMETERS); } bool Verify(::flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && @@ -245,7 +245,7 @@ struct AdapterBuilder { void add_model_version(int32_t model_version) { fbb_.AddElement(Adapter::VT_MODEL_VERSION, model_version, 0); } - void add_parameters(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> parameters) { + void add_parameters(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> parameters) { fbb_.AddOffset(Adapter::VT_PARAMETERS, parameters); } explicit AdapterBuilder(::flatbuffers::FlatBufferBuilder &_fbb) @@ -264,7 +264,7 @@ inline ::flatbuffers::Offset CreateAdapter( int32_t format_version = 0, int32_t adapter_version = 0, int32_t model_version = 0, - ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> parameters = 0) { + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> parameters = 0) { AdapterBuilder builder_(_fbb); builder_.add_parameters(parameters); builder_.add_model_version(model_version); @@ -278,9 +278,9 @@ inline ::flatbuffers::Offset CreateAdapterDirect( int32_t format_version = 0, int32_t adapter_version = 0, int32_t model_version = 0, - const std::vector<::flatbuffers::Offset> *parameters = nullptr) { - auto parameters__ = parameters ? _fbb.CreateVector<::flatbuffers::Offset>(*parameters) : 0; - return onnxruntime::lora_format::CreateAdapter( + const std::vector<::flatbuffers::Offset> *parameters = nullptr) { + auto parameters__ = parameters ? _fbb.CreateVector<::flatbuffers::Offset>(*parameters) : 0; + return onnxruntime::lora::CreateAdapter( _fbb, format_version, adapter_version, @@ -288,12 +288,12 @@ inline ::flatbuffers::Offset CreateAdapterDirect( parameters__); } -inline const onnxruntime::lora_format::Adapter *GetAdapter(const void *buf) { - return ::flatbuffers::GetRoot(buf); +inline const onnxruntime::lora::Adapter *GetAdapter(const void *buf) { + return ::flatbuffers::GetRoot(buf); } -inline const onnxruntime::lora_format::Adapter *GetSizePrefixedAdapter(const void *buf) { - return ::flatbuffers::GetSizePrefixedRoot(buf); +inline const onnxruntime::lora::Adapter *GetSizePrefixedAdapter(const void *buf) { + return ::flatbuffers::GetSizePrefixedRoot(buf); } inline const char *AdapterIdentifier() { @@ -312,27 +312,27 @@ inline bool SizePrefixedAdapterBufferHasIdentifier(const void *buf) { inline bool VerifyAdapterBuffer( ::flatbuffers::Verifier &verifier) { - return verifier.VerifyBuffer(AdapterIdentifier()); + return verifier.VerifyBuffer(AdapterIdentifier()); } inline bool VerifySizePrefixedAdapterBuffer( ::flatbuffers::Verifier &verifier) { - return verifier.VerifySizePrefixedBuffer(AdapterIdentifier()); + return verifier.VerifySizePrefixedBuffer(AdapterIdentifier()); } inline void FinishAdapterBuffer( ::flatbuffers::FlatBufferBuilder &fbb, - ::flatbuffers::Offset root) { + ::flatbuffers::Offset root) { fbb.Finish(root, AdapterIdentifier()); } inline void FinishSizePrefixedAdapterBuffer( ::flatbuffers::FlatBufferBuilder &fbb, - ::flatbuffers::Offset root) { + ::flatbuffers::Offset root) { fbb.FinishSizePrefixed(root, AdapterIdentifier()); } -} // namespace lora_format +} // namespace lora } // namespace onnxruntime -#endif // FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_FORMAT_H_ +#endif // FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_H_ diff --git a/onnxruntime/lora/lora_format_utils.cc b/onnxruntime/lora/lora_format_utils.cc new file mode 100644 index 0000000000000..1e2ce1d58d0fe --- /dev/null +++ b/onnxruntime/lora/lora_format_utils.cc @@ -0,0 +1,111 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "lora_format_utils.h" +#include "lora_format_version.h" + +#include "core/common/common.h" +#include "core/common/span_utils.h" + +#include + +namespace onnxruntime { +namespace lora { +namespace utils { + +bool IsLoraFormatModelBytes(const void* bytes, size_t num_bytes) { + return num_bytes > 8 && // check buffer is large enough to contain identifier so we don't read random memory + AdapterBufferHasIdentifier(bytes); +} + +flatbuffers::Offset SaveStringToLoraFormat(flatbuffers::FlatBufferBuilder& builder, + bool has_string, const std::string& src) { + if (has_string) return builder.CreateString(src); + + // If the string does not exist, return 0 (the string does not exist in flatbuffer) + return 0; +} + +void LoadStringFromLoraFormat(std::string& dst, const flatbuffers::String* fbs_string) { + if (fbs_string) { + dst = fbs_string->str(); + } +} + +std::vector LoadLoraAdapterBytes(const std::filesystem::path& file_path) { + Env& env = Env::Default(); + + size_t file_size = 0; + ORT_THROW_IF_ERROR(env.GetFileLength(file_path.c_str(), file_size)); + + std::vector result; + result.resize(file_size); + + // The API accepts char span, so we need to reinterpret the uint8_t span as char span + auto dest_span = ReinterpretAsSpan(AsSpan(result)); + ORT_THROW_IF_ERROR(env.ReadFileIntoBuffer(file_path.c_str(), 0, file_size, dest_span)); + + return result; +} + +std::pair MemoryMapAdapterFile(const std::filesystem::path& file_path) { + Env& env = Env::Default(); + + size_t file_size = 0; + ORT_THROW_IF_ERROR(env.GetFileLength(file_path.c_str(), file_size)); + + Env::MappedMemoryPtr result; + ORT_THROW_IF_ERROR(env.MapFileIntoMemory(file_path.c_str(), 0, file_size, result)); + + return {std::move(result), file_size}; +} + +const Adapter* ValidateAndGetAdapterFromBytes(gsl::span bytes) { + if (!IsLoraFormatModelBytes(bytes.data(), bytes.size())) { + ORT_THROW("The buffer does not appear to be a valid lora parameter format"); + } + + flatbuffers::Verifier verifier(bytes.data(), bytes.size()); + if (!VerifyAdapterBuffer(verifier)) { + ORT_THROW("The buffer fails lora adapter format verification"); + } + + auto* adapter = GetAdapter(bytes.data()); + if (!IsLoraFormatVersionSupported(adapter->format_version())) { + ORT_THROW("Unsupported lora format version"); + } + + return adapter; +} + +void SaveLoraParameter(flatbuffers::FlatBufferBuilder& flat_builder, std::string_view name, + TensorDataType data_type, gsl::span shape, + gsl::span data, + flatbuffers::Offset& fbs_tensor) { + auto name_str = (name.empty()) ? 0 : flat_builder.CreateString(name.data(), name.size()); + auto shape_vec = flat_builder.CreateVector(shape.data(), shape.size()); + auto data_vec = flat_builder.CreateVector(data.data(), data.size()); + + fbs_tensor = CreateParameter(flat_builder, name_str, shape_vec, data_type, data_vec); +} + +// std::pair CreateOrtValueOverFlatBufferLoraParameter( +// const Parameter& tensor) { +// std::string name; +// LoadStringFromLoraFormat(name, tensor.name()); + +// const auto data_type = tensor.data_type(); + +// gsl::span shape_span(tensor.dims()->data(), tensor.dims()->size()); + +// auto mem_info = OrtMemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault); +// auto ort_value = +// OrtValue::CreateTensor(*mem_info, const_cast(tensor.raw_data()->data()), +// static_cast(tensor.raw_data()->size()), shape_span, +// static_cast(data_type)); +// return std::make_pair(std::move(name), std::move(ort_value)); +// } + +} // namespace utils +} // namespace lora +} // namespace onnxruntime diff --git a/onnxruntime/lora/lora_format_utils.h b/onnxruntime/lora/lora_format_utils.h new file mode 100644 index 0000000000000..508eb38ffb27c --- /dev/null +++ b/onnxruntime/lora/lora_format_utils.h @@ -0,0 +1,87 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/common/flatbuffers.h" +#include "core/platform/env.h" + +#include +#include + +#include "lora_format/lora_schema.fbs.h" + +#include +#include +#include +#include + +namespace onnxruntime { +namespace lora { +namespace utils { + +/// +/// +/// +/// +/// +/// +bool IsLoraFormatModelBytes(const void* bytes, size_t num_bytes); + +// Will only create string in flatbuffers when has_string is true +flatbuffers::Offset SaveStringToLoraFormat(flatbuffers::FlatBufferBuilder& builder, + bool has_string, const std::string& src); + +void LoadStringFromLoraFormat(std::string& dst, const flatbuffers::String* fbs_string); + +/// +/// The function loads the lora adapter bytes from the file system +/// +/// file path +/// bytes in a vector +/// If the path can not be found +std::vector LoadLoraAdapterBytes(const std::filesystem::path& file_path); + +/// +/// This function memory maps the adapter file in memory +/// +/// +/// +std::pair MemoryMapAdapterFile(const std::filesystem::path& file_path); + +/// +/// Validates underlying format and the format version +/// +/// +/// Adapter ptr +const Adapter* ValidateAndGetAdapterFromBytes(gsl::span bytes); + +/// +/// Serializes tensor data into flatbuffer +/// +/// +/// parameter name +/// doc, optional +/// +/// +/// +/// output offset +void SaveLoraParameter(flatbuffers::FlatBufferBuilder& flat_builder, std::string_view name, + lora::TensorDataType data_type, + gsl::span shape, gsl::span data, + flatbuffers::Offset& fbs_tensor); + +/// +/// Create an OrtValue on top of the flatbuffer tensor +/// No copying of data is done here. The caller is responsible for managing the lifetime of flatbuffer +/// structures. +/// +/// In this scenario, one can memory map the entire flatbuffer tensor data into OrtValue without copying. +/// +/// +/// +// std::pair CreateOrtValueOverFlatBufferLoraParameter( +// const Generators::lora_parameters::Param& tensor); +} // namespace utils +} // namespace lora +} // namespace Generators diff --git a/onnxruntime/lora/lora_format_version.h b/onnxruntime/lora/lora_format_version.h new file mode 100644 index 0000000000000..9c90a86b16382 --- /dev/null +++ b/onnxruntime/lora/lora_format_version.h @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include +#include + +namespace onnxruntime { +namespace lora { + +// The current model versions for saving lora parameters in flatbuffers +// Once this version is updated, the kSupportedLoraFormatVersions in IsGenAiLoraFormatModelBytes +// below will also need to be updated. +// See src/flatbuffers/schema/README.md for more details on versioning. +// Version 1 - history begins +constexpr const int kLoraFormatVersion = 1; + +// Check if the given lora format version is supported in this build +inline bool IsLoraFormatVersionSupported(const int lora_format_version) { + // The lora format versions we will support in this build + // This may contain more versions than the kLoraFormatVersion, based on the compatibilities + static constexpr std::array kSupportedLoraFormatVersions{ + kLoraFormatVersion, + }; + + const auto it = + std::find(kSupportedLoraFormatVersions.begin(), kSupportedLoraFormatVersions.end(), lora_format_version); + return it != kSupportedLoraFormatVersions.cend(); +} + +} // namespace lora +} // namespace onnxruntime From 00fb33713ae219f20b1df99910be5d94ab631976 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 30 Aug 2024 15:14:29 -0700 Subject: [PATCH 03/84] Define and expose C API stubs --- cmake/onnxruntime_session.cmake | 2 +- .../onnxruntime/core/framework/run_options.h | 6 ++++ .../core/session/onnxruntime_c_api.h | 32 +++++++++++++++++++ onnxruntime/core/framework/config_options.h | 2 +- onnxruntime/core/framework/run_options.cc | 19 +++++++++++ onnxruntime/core/session/onnxruntime_c_api.cc | 9 ++++-- onnxruntime/core/session/ort_apis.h | 7 ++++ onnxruntime/lora/lora_adapters.cc | 9 ++++++ onnxruntime/lora/lora_adapters.h | 12 +++++-- onnxruntime/lora/lora_format_utils.h | 2 +- 10 files changed, 93 insertions(+), 7 deletions(-) diff --git a/cmake/onnxruntime_session.cmake b/cmake/onnxruntime_session.cmake index b51c875951135..2325ff82dedc5 100644 --- a/cmake/onnxruntime_session.cmake +++ b/cmake/onnxruntime_session.cmake @@ -30,7 +30,7 @@ endif() source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_session_srcs}) onnxruntime_add_static_library(onnxruntime_session ${onnxruntime_session_srcs}) -onnxruntime_add_include_to_target(onnxruntime_session onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface nlohmann_json::nlohmann_json) +onnxruntime_add_include_to_target(onnxruntime_session onnxruntime_common onnxruntime_lora onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface nlohmann_json::nlohmann_json) if(onnxruntime_ENABLE_INSTRUMENT) target_compile_definitions(onnxruntime_session PUBLIC ONNXRUNTIME_ENABLE_INSTRUMENT) endif() diff --git a/include/onnxruntime/core/framework/run_options.h b/include/onnxruntime/core/framework/run_options.h index 789c3b13f2c3e..ed506ed815dec 100644 --- a/include/onnxruntime/core/framework/run_options.h +++ b/include/onnxruntime/core/framework/run_options.h @@ -8,6 +8,10 @@ #include "core/session/onnxruntime_c_api.h" #include "core/framework/config_options.h" +namespace onnxruntime { +struct OrtLoraAdapter; +} // namespace onnxruntime + /** * Configuration information for a Run call. */ @@ -40,6 +44,8 @@ struct OrtRunOptions { // /include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h onnxruntime::ConfigOptions config_options; + std::vector active_adapters_; + OrtRunOptions() = default; ~OrtRunOptions() = default; }; diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 4674db42fb1c9..cffe1d8e77af5 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -304,6 +304,7 @@ ORT_RUNTIME_CLASS(Op); ORT_RUNTIME_CLASS(OpAttr); ORT_RUNTIME_CLASS(Logger); ORT_RUNTIME_CLASS(ShapeInferContext); +ORT_RUNTIME_CLASS(LoraAdapter); #ifdef _WIN32 typedef _Return_type_success_(return == 0) OrtStatus* OrtStatusPtr; @@ -4667,6 +4668,37 @@ struct OrtApi { _In_reads_(num_external_initializer_files) char* const* external_initializer_file_buffer_array, _In_reads_(num_external_initializer_files) const size_t* external_initializer_file_lengths, size_t num_external_initializer_files); + + /** \brief Create an OrtLoraAdapter + * + * The function attempts to locate file specified by adapter_file_path, read it and create an OrtLoraAdapter + * instance. The adapter_file_path should be a valid absolute path to a file that contains a valid Lora Adapter + * format. The function attempts to validate the format at load time. The file will always be memory mapped, unless + * the platform does not support memory mapping, in which case the file will be read into memory. + * + * \param[in] adapter_file_path Absolute file path to the adapter file. + * \param[out] out A pointer to a newly created OrtLoraAdapter instance. Must be released with + * OrtApi::ReleaseLoraAdapter. + */ + ORT_API2_STATUS(CreateLoraAdapter, const ORTCHAR_T* adapter_file_path, _Outptr_ OrtLoraAdapter** out); + + /** \brief Release an ::OrtLoraAdapter obtained from OrtApi::CreateLoraAdapter + */ + ORT_CLASS_RELEASE(LoraAdapter); + + + /** \brief Set the active Lora Adapter for the run options + * + * The function sets the active Lora Adapter for the run options. The Lora Adapter must be created with + * OrtApi::CreateLoraAdapter. The Lora Adapter will be used by the session to run the model. + * The instance of the OrtRunOptions will then can be used to customize the OrtSession::Run() calls. + * More than one OrtLoraAdapter can be set active at the same time. Lora Parameters that belong to difference + * Lora adapters that will be active at the same time must not overlap. + * + * \param[in] options OrtRunOptions instance + * \param[in] adapter OrtLoraAdapter instance + */ + ORT_API2_STATUS(RunOptionsSetActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter* adapter); }; /* diff --git a/onnxruntime/core/framework/config_options.h b/onnxruntime/core/framework/config_options.h index 7b7c226819e79..efdfdb45abbaa 100644 --- a/onnxruntime/core/framework/config_options.h +++ b/onnxruntime/core/framework/config_options.h @@ -19,7 +19,7 @@ struct ConfigOptions { // Gets the config string associated with the given config_key. // If not found, an empty optional is returned. - optional GetConfigEntry(const std::string& config_key) const noexcept; + std::optional GetConfigEntry(const std::string& config_key) const noexcept; // Check if this instance of ConfigOptions has a config using the given config_key. // Returns true if found and copies the value into config_value. diff --git a/onnxruntime/core/framework/run_options.cc b/onnxruntime/core/framework/run_options.cc index 95c111009c791..65f36f07c4847 100644 --- a/onnxruntime/core/framework/run_options.cc +++ b/onnxruntime/core/framework/run_options.cc @@ -2,12 +2,15 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. #include "core/framework/run_options.h" +#include "lora/lora_adapters.h" #include "core/session/onnxruntime_c_api.h" #include "core/session/ort_apis.h" #include "core/framework/error_code_helper.h" + #if defined(_MSC_VER) && !defined(__clang__) #pragma warning(disable : 26409) #endif + ORT_API_STATUS_IMPL(OrtApis::CreateRunOptions, _Outptr_ OrtRunOptions** out) { API_IMPL_BEGIN *out = new OrtRunOptions(); @@ -60,3 +63,19 @@ ORT_API_STATUS_IMPL(OrtApis::AddRunConfigEntry, _Inout_ OrtRunOptions* options, _In_z_ const char* config_key, _In_z_ const char* config_value) { return onnxruntime::ToOrtStatus(options->config_options.AddConfigEntry(config_key, config_value)); } + +ORT_API_STATUS_IMPL(OrtApis::RunOptionsSetActiveLoraAdapter, _Inout_ OrtRunOptions*, const _In_ OrtLoraAdapter*) { + // Need cast to the real type + // options->active_adapters_.push_back(adapter); + return nullptr; +} + +ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapter, const ORTCHAR_T*, _Outptr_ OrtLoraAdapter**) { + //auto adapter = new onnxruntime::OrtLoraAdapter(adapter_file_path); + // *out = adapter.release(); + return nullptr; +} + +ORT_API(void, OrtApis::ReleaseLoraAdapter, _Frees_ptr_opt_ OrtLoraAdapter*) { + // delete reinterpret_cast(adapter); +} diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index 1a5484ddc0055..96f2ee1e14ee1 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -2730,6 +2730,9 @@ static constexpr OrtApi ort_api_1_to_20 = { &OrtApis::KernelInfoGetAllocator, &OrtApis::AddExternalInitializersFromFilesInMemory, // End of Version 18 - DO NOT MODIFY ABOVE (see above text for more information) + &OrtApis::CreateLoraAdapter, + &OrtApis::ReleaseLoraAdapter, + &OrtApis::RunOptionsSetActiveLoraAdapter, }; // OrtApiBase can never change as there is no way to know what version of OrtApiBase is returned by OrtGetApiBase. @@ -2786,7 +2789,7 @@ ORT_API(const char*, OrtApis::GetVersionString) { return ORT_VERSION; } -ORT_API(const char*, OrtApis::GetBuildInfoString) { +const char* _stdcall OrtApis::GetBuildInfoString() noexcept { return ORT_BUILD_INFO; } @@ -2799,6 +2802,8 @@ ORT_API(void, OrtApis::ReleaseEnv, OrtEnv* value) { } DEFINE_RELEASE_ORT_OBJECT_FUNCTION(Value, OrtValue) -DEFINE_RELEASE_ORT_OBJECT_FUNCTION(RunOptions, OrtRunOptions) +void _stdcall OrtApis::ReleaseRunOptions(OrtRunOptions* value) noexcept { + delete reinterpret_cast(value); +} DEFINE_RELEASE_ORT_OBJECT_FUNCTION(Session, ::onnxruntime::InferenceSession) DEFINE_RELEASE_ORT_OBJECT_FUNCTION(ModelMetadata, ::onnxruntime::ModelMetadata) diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h index fcae173e6c162..8564985beb9f7 100644 --- a/onnxruntime/core/session/ort_apis.h +++ b/onnxruntime/core/session/ort_apis.h @@ -118,6 +118,8 @@ ORT_API_STATUS_IMPL(RunOptionsGetRunTag, _In_ const OrtRunOptions*, _Out_ const ORT_API_STATUS_IMPL(RunOptionsSetTerminate, _Inout_ OrtRunOptions* options); ORT_API_STATUS_IMPL(RunOptionsUnsetTerminate, _Inout_ OrtRunOptions* options); +ORT_API_STATUS_IMPL(RunOptionsSetActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter*); + ORT_API_STATUS_IMPL(CreateTensorAsOrtValue, _Inout_ OrtAllocator* allocator, _In_ const int64_t* shape, size_t shape_len, ONNXTensorElementDataType type, @@ -523,4 +525,9 @@ ORT_API_STATUS_IMPL(SessionOptionsAppendExecutionProvider_VitisAI, _In_ OrtSessi ORT_API_STATUS_IMPL(KernelContext_GetScratchBuffer, _In_ const OrtKernelContext* context, _In_ const OrtMemoryInfo* mem_info, _In_ size_t count_or_bytes, _Outptr_ void** out); ORT_API_STATUS_IMPL(KernelInfoGetAllocator, _In_ const OrtKernelInfo* info, _In_ OrtMemType mem_type, _Outptr_ OrtAllocator** out); + +ORT_API_STATUS_IMPL(CreateLoraAdapter, _In_ const ORTCHAR_T* adapter_file_path, _Outptr_ OrtLoraAdapter** out); +ORT_API(void, ReleaseLoraAdapter, _Frees_ptr_opt_ OrtLoraAdapter*); +ORT_API_STATUS_IMPL(RunOptionsSetActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter* adapter); + } // namespace OrtApis diff --git a/onnxruntime/lora/lora_adapters.cc b/onnxruntime/lora/lora_adapters.cc index 1425690b41660..e10a577076547 100644 --- a/onnxruntime/lora/lora_adapters.cc +++ b/onnxruntime/lora/lora_adapters.cc @@ -14,12 +14,21 @@ namespace details { LoraParam::LoraParam(std::string name, OrtValue ort_value) : name_(std::move(name)), ort_value_(std::move(ort_value)) {} +BinaryFormatHolder::~BinaryFormatHolder() = default; + void BinaryFormatHolder::Load(const std::filesystem::path& file_path) { auto buffer = utils::LoadLoraAdapterBytes(file_path); adapter_ = utils::ValidateAndGetAdapterFromBytes(buffer); buffer_.emplace(std::move(buffer)); } +void BinaryFormatHolder::MemoryMap(const std::filesystem::path& file_path) { + auto [mapped_memory, file_size] = utils::MemoryMapAdapterFile(file_path); + auto u8_span = ReinterpretAsSpan(gsl::make_span(mapped_memory.get(), file_size)); + adapter_ = utils::ValidateAndGetAdapterFromBytes(u8_span); + buffer_.emplace(std::move(mapped_memory), file_size); +} + size_t BinaryFormatHolder::GetSize() const { if (std::holds_alternative(buffer_)) { return std::get<0>(buffer_).file_size_; diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h index 5966e14248fb0..541ae59fbe293 100644 --- a/onnxruntime/lora/lora_adapters.h +++ b/onnxruntime/lora/lora_adapters.h @@ -25,14 +25,22 @@ class BinaryFormatHolder { BinaryFormatHolder() = default; BinaryFormatHolder(const BinaryFormatHolder&) = delete; BinaryFormatHolder& operator=(const BinaryFormatHolder&) = delete; + ~BinaryFormatHolder(); + + BinaryFormatHolder(BinaryFormatHolder&&) = default; + BinaryFormatHolder& operator=(BinaryFormatHolder&&) = default; /// - /// Load parameters from a flatbuffer file. + /// Load parameters from an adapter file and validates its format. /// /// file name that can be opened void Load(const std::filesystem::path& file_path); - void MemoryMapFile(const std::string& file_name); + /// + /// Memory maps adapter file into memory and validates its format. + /// + /// + void MemoryMap(const std::filesystem::path& file_path); // Get Flatbuffer object pointer const Adapter* GetParameters() const noexcept { return adapter_; } diff --git a/onnxruntime/lora/lora_format_utils.h b/onnxruntime/lora/lora_format_utils.h index 508eb38ffb27c..75b499eb0bef9 100644 --- a/onnxruntime/lora/lora_format_utils.h +++ b/onnxruntime/lora/lora_format_utils.h @@ -46,7 +46,7 @@ std::vector LoadLoraAdapterBytes(const std::filesystem::path& file_path /// This function memory maps the adapter file in memory /// /// -/// +/// memory handle and file size in a tuple std::pair MemoryMapAdapterFile(const std::filesystem::path& file_path); /// From 8d070c9ad89e2249e6887b8221b0015f8ce27d46 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 30 Aug 2024 16:45:05 -0700 Subject: [PATCH 04/84] Add loading --- .../core/session/onnxruntime_cxx_api.h | 12 ++++++ .../core/session/onnxruntime_cxx_inline.h | 8 ++++ onnxruntime/lora/lora_adapters.cc | 8 ++++ onnxruntime/lora/lora_adapters.h | 43 ++++++++++++++++++- onnxruntime/lora/lora_format_utils.cc | 32 ++++++++------ onnxruntime/lora/lora_format_utils.h | 22 ++++++++-- 6 files changed, 107 insertions(+), 18 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h index 29a229f427163..b8f61d2e3d22f 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h @@ -508,6 +508,7 @@ ORT_DEFINE_RELEASE(CustomOpDomain); ORT_DEFINE_RELEASE(ThreadingOptions); ORT_DEFINE_RELEASE(Env); ORT_DEFINE_RELEASE(RunOptions); +ORT_DEFINE_RELEASE(LoraAdapter); ORT_DEFINE_RELEASE(Session); ORT_DEFINE_RELEASE(SessionOptions); ORT_DEFINE_RELEASE(TensorTypeAndShapeInfo); @@ -736,6 +737,15 @@ struct CustomOpDomain : detail::Base { void Add(const OrtCustomOp* op); ///< Wraps CustomOpDomain_Add }; +/// \brief LoraAdapter holds a set of Lora Parameters loaded from a single file +struct LoraAdapter : detail::Base { + /// \brief Wraps OrtApi::CreateLoraAdapter + /// + /// The function attempts to load the adapter from the specified file + /// \param absolute_adapter_path The absolute path to the Lora adapter + explicit LoraAdapter(const std::basic_string& absolute_adapter_path); +}; + /** \brief RunOptions * */ @@ -766,6 +776,8 @@ struct RunOptions : detail::Base { * Wraps OrtApi::RunOptionsUnsetTerminate */ RunOptions& UnsetTerminate(); + + RunOptions& SetLoraAdapterActive(const LoraAdapter& adapter); }; namespace detail { diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h index d3a8cade4d28f..f7290aa610ff4 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h @@ -557,6 +557,10 @@ inline void CustomOpDomain::Add(const OrtCustomOp* op) { ThrowOnError(GetApi().CustomOpDomain_Add(p_, op)); } +inline LoraAdapter::LoraAdapter(const std::basic_string& absolute_adapter_path) { + ThrowOnError(GetApi().CreateLoraAdapter(absolute_adapter_path.c_str(), &p_)); +} + inline RunOptions::RunOptions() { ThrowOnError(GetApi().CreateRunOptions(&p_)); } @@ -609,6 +613,10 @@ inline RunOptions& RunOptions::UnsetTerminate() { return *this; } +inline RunOptions& RunOptions::SetLoraAdapterActive(const LoraAdapter& adapter) { + ThrowOnError(GetApi().RunOptionsSetActiveLoraAdapter(p_, adapter)); +} + namespace detail { template diff --git a/onnxruntime/lora/lora_adapters.cc b/onnxruntime/lora/lora_adapters.cc index e10a577076547..d1b3a56413bee 100644 --- a/onnxruntime/lora/lora_adapters.cc +++ b/onnxruntime/lora/lora_adapters.cc @@ -40,5 +40,13 @@ size_t BinaryFormatHolder::GetSize() const { } // namespace details +void LoraAdapter::Load(const std::filesystem::path& file_path) { + binary_format_holder_.Load(file_path); +} + +void LoraAdapter::MemoryMap(const std::filesystem::path& file_path) { + binary_format_holder_.MemoryMap(file_path); +} + } // namespace lora } // namespace onnxruntime \ No newline at end of file diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h index 541ae59fbe293..bb887bbe04b85 100644 --- a/onnxruntime/lora/lora_adapters.h +++ b/onnxruntime/lora/lora_adapters.h @@ -3,9 +3,12 @@ #pragma once +#include "core/common/inlined_containers.h" #include "core/platform/env.h" #include "core/framework/ort_value.h" +#include "lora/lora_format_utils.h" + #include #include #include @@ -43,7 +46,7 @@ class BinaryFormatHolder { void MemoryMap(const std::filesystem::path& file_path); // Get Flatbuffer object pointer - const Adapter* GetParameters() const noexcept { return adapter_; } + const Adapter* GetBinaryAdapter() const noexcept { return adapter_; } // Get the size of the buffer size_t GetSize() const; @@ -62,7 +65,7 @@ class BinaryFormatHolder { }; std::variant buffer_; - const Adapter* adapter_; + const Adapter* adapter_{nullptr}; }; /// @@ -78,5 +81,41 @@ struct LoraParam { } // namespace details +/// +/// Container to hold and access Lora Parameters +/// +class LoraAdapter { + public: + LoraAdapter() = default; + LoraAdapter(const LoraAdapter&) = delete; + LoraAdapter& operator=(const LoraAdapter&) = delete; + ~LoraAdapter() = default; + + LoraAdapter(LoraAdapter&&) = default; + LoraAdapter& operator=(LoraAdapter&&) = default; + + /// + /// Load parameters into memory from an adapter file and validates its format. + /// + /// file name that can be opened + void Load(const std::filesystem::path& file_path); + + /// + /// Memory maps adapter file into memory and validates its format. + /// + /// + void MemoryMap(const std::filesystem::path& file_path); + + template + void OutputAdaptersParameters(NamesOutputIter names_out, + TensorOutputIter params_out) { + const auto* adapter = binary_format_holder_.GetBinaryAdapter(); + utils::OutputAdaptersParameters(*adapter, names_out, params_out); + } + + private: + details::BinaryFormatHolder binary_format_holder_; +}; + } // namespace lora } // namespace onnxruntime diff --git a/onnxruntime/lora/lora_format_utils.cc b/onnxruntime/lora/lora_format_utils.cc index 1e2ce1d58d0fe..b9710af4a570e 100644 --- a/onnxruntime/lora/lora_format_utils.cc +++ b/onnxruntime/lora/lora_format_utils.cc @@ -6,6 +6,10 @@ #include "core/common/common.h" #include "core/common/span_utils.h" +#include "core/framework/ortdevice.h" +#include "core/framework/ortmemoryinfo.h" +#include "core/framework/ort_value.h" +#include "core/framework/tensor.h" #include @@ -89,23 +93,25 @@ void SaveLoraParameter(flatbuffers::FlatBufferBuilder& flat_builder, std::string fbs_tensor = CreateParameter(flat_builder, name_str, shape_vec, data_type, data_vec); } -// std::pair CreateOrtValueOverFlatBufferLoraParameter( -// const Parameter& tensor) { -// std::string name; -// LoadStringFromLoraFormat(name, tensor.name()); +std::pair CreateOrtValueOverLoraParameter(const Parameter& param) { + OrtValue result; -// const auto data_type = tensor.data_type(); + std::string name; + LoadStringFromLoraFormat(name, param.name()); -// gsl::span shape_span(tensor.dims()->data(), tensor.dims()->size()); + const auto data_type = param.data_type(); + gsl::span shape_span(param.dims()->data(), param.dims()->size()); -// auto mem_info = OrtMemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault); -// auto ort_value = -// OrtValue::CreateTensor(*mem_info, const_cast(tensor.raw_data()->data()), -// static_cast(tensor.raw_data()->size()), shape_span, -// static_cast(data_type)); -// return std::make_pair(std::move(name), std::move(ort_value)); -// } + OrtMemoryInfo cpu_meminfo(CPU, OrtAllocatorType::OrtDeviceAllocator); + Tensor::InitOrtValue(DataTypeImpl::TensorTypeFromONNXEnum(data_type)->GetElementType(), + TensorShape(shape_span), + const_cast(param.raw_data()->data()), + cpu_meminfo, + result); + + return std::make_pair(std::move(name), std::move(result)); +} } // namespace utils } // namespace lora } // namespace onnxruntime diff --git a/onnxruntime/lora/lora_format_utils.h b/onnxruntime/lora/lora_format_utils.h index 75b499eb0bef9..c6d8efcab80c9 100644 --- a/onnxruntime/lora/lora_format_utils.h +++ b/onnxruntime/lora/lora_format_utils.h @@ -16,6 +16,8 @@ #include #include +struct OrtValue; + namespace onnxruntime { namespace lora { namespace utils { @@ -80,8 +82,22 @@ void SaveLoraParameter(flatbuffers::FlatBufferBuilder& flat_builder, std::string /// /// /// -// std::pair CreateOrtValueOverFlatBufferLoraParameter( -// const Generators::lora_parameters::Param& tensor); +std::pair CreateOrtValueOverLoraParameter(const Parameter& param); + +template +void OutputAdaptersParameters(const Adapter& adapter, + NamesOutputIter names_out, + TensorOutputIter params_out) { + const auto* params = adapter.parameters(); + for (const auto* param : params) { + auto [name, ort_value] = utils::CreateOrtValueOverLoraParameter(*param); + *names_out = std::move(name); + ++names_out; + *params_out = std::move(ort_value); + ++params_out; + } +} + } // namespace utils } // namespace lora -} // namespace Generators +} // namespace onnxruntime From 2c29f64c0cbf74e55d8c61f2395ffbacd6ab2a9b Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Tue, 3 Sep 2024 14:34:49 -0700 Subject: [PATCH 05/84] Implement LoraAdapter and public APIs --- .../onnxruntime/core/framework/run_options.h | 6 +- onnxruntime/core/framework/run_options.cc | 24 +++-- onnxruntime/lora/lora_adapters.cc | 44 ++++---- onnxruntime/lora/lora_adapters.h | 101 +++++++----------- 4 files changed, 83 insertions(+), 92 deletions(-) diff --git a/include/onnxruntime/core/framework/run_options.h b/include/onnxruntime/core/framework/run_options.h index ed506ed815dec..aa741af0f1643 100644 --- a/include/onnxruntime/core/framework/run_options.h +++ b/include/onnxruntime/core/framework/run_options.h @@ -9,7 +9,9 @@ #include "core/framework/config_options.h" namespace onnxruntime { -struct OrtLoraAdapter; +namespace lora { +class LoraAdapter; +} } // namespace onnxruntime /** @@ -44,7 +46,7 @@ struct OrtRunOptions { // /include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h onnxruntime::ConfigOptions config_options; - std::vector active_adapters_; + std::vector active_adapters_; OrtRunOptions() = default; ~OrtRunOptions() = default; diff --git a/onnxruntime/core/framework/run_options.cc b/onnxruntime/core/framework/run_options.cc index 65f36f07c4847..00e2a17d60df5 100644 --- a/onnxruntime/core/framework/run_options.cc +++ b/onnxruntime/core/framework/run_options.cc @@ -64,18 +64,26 @@ ORT_API_STATUS_IMPL(OrtApis::AddRunConfigEntry, _Inout_ OrtRunOptions* options, return onnxruntime::ToOrtStatus(options->config_options.AddConfigEntry(config_key, config_value)); } -ORT_API_STATUS_IMPL(OrtApis::RunOptionsSetActiveLoraAdapter, _Inout_ OrtRunOptions*, const _In_ OrtLoraAdapter*) { - // Need cast to the real type - // options->active_adapters_.push_back(adapter); +ORT_API_STATUS_IMPL(OrtApis::RunOptionsSetActiveLoraAdapter, _Inout_ OrtRunOptions* options, + const _In_ OrtLoraAdapter* adapter) { + API_IMPL_BEGIN + auto* lora_adapter = reinterpret_cast(adapter); + options->active_adapters_.push_back(lora_adapter); return nullptr; + API_IMPL_END } -ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapter, const ORTCHAR_T*, _Outptr_ OrtLoraAdapter**) { - //auto adapter = new onnxruntime::OrtLoraAdapter(adapter_file_path); - // *out = adapter.release(); +ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapter, const ORTCHAR_T* adapter_file_path, + _Outptr_ OrtLoraAdapter** adapter) { + API_IMPL_BEGIN + auto lora_adapter = std::make_unique(); + // For platforms that do not support Memmap, we can #ifdef it to ->Load(adapter_file_path) + lora_adapter->Load(adapter_file_path); + *adapter = reinterpret_cast(lora_adapter.release()); return nullptr; + API_IMPL_END } -ORT_API(void, OrtApis::ReleaseLoraAdapter, _Frees_ptr_opt_ OrtLoraAdapter*) { - // delete reinterpret_cast(adapter); +ORT_API(void, OrtApis::ReleaseLoraAdapter, _Frees_ptr_opt_ OrtLoraAdapter* adapter) { + delete reinterpret_cast(adapter); } diff --git a/onnxruntime/lora/lora_adapters.cc b/onnxruntime/lora/lora_adapters.cc index d1b3a56413bee..2db4eff754428 100644 --- a/onnxruntime/lora/lora_adapters.cc +++ b/onnxruntime/lora/lora_adapters.cc @@ -9,43 +9,49 @@ namespace onnxruntime { namespace lora { -namespace details { -LoraParam::LoraParam(std::string name, OrtValue ort_value) +LoraAdapter::LoraParam::LoraParam(std::string name, OrtValue ort_value) noexcept : name_(std::move(name)), ort_value_(std::move(ort_value)) {} -BinaryFormatHolder::~BinaryFormatHolder() = default; - -void BinaryFormatHolder::Load(const std::filesystem::path& file_path) { +void LoraAdapter::Load(const std::filesystem::path& file_path) { auto buffer = utils::LoadLoraAdapterBytes(file_path); adapter_ = utils::ValidateAndGetAdapterFromBytes(buffer); buffer_.emplace(std::move(buffer)); + + InitializeParamsValues(); } -void BinaryFormatHolder::MemoryMap(const std::filesystem::path& file_path) { +void LoraAdapter::MemoryMap(const std::filesystem::path& file_path) { auto [mapped_memory, file_size] = utils::MemoryMapAdapterFile(file_path); auto u8_span = ReinterpretAsSpan(gsl::make_span(mapped_memory.get(), file_size)); adapter_ = utils::ValidateAndGetAdapterFromBytes(u8_span); buffer_.emplace(std::move(mapped_memory), file_size); -} -size_t BinaryFormatHolder::GetSize() const { - if (std::holds_alternative(buffer_)) { - return std::get<0>(buffer_).file_size_; - } else if (std::holds_alternative(buffer_)) { - return std::get<1>(buffer_).buffer_.size(); - } - ORT_THROW("Non-exhaustive visitor for BinaryFormatHolder::GetSize()"); + InitializeParamsValues(); } -} // namespace details +void LoraAdapter::InitializeParamsValues() { + if (adapter_ == nullptr) { + ORT_THROW("Adapter is not loaded yet."); + } -void LoraAdapter::Load(const std::filesystem::path& file_path) { - binary_format_holder_.Load(file_path); + const auto* params = adapter_->parameters(); + InlinedHashMap params_values; + params_values.reserve(params->size()); + for (const auto* param : *params) { + auto [name, ort_value] = utils::CreateOrtValueOverLoraParameter(*param); + params_values.emplace(name, LoraParam(std::move(name), std::move(ort_value))); + } + params_values_.swap(params_values); } -void LoraAdapter::MemoryMap(const std::filesystem::path& file_path) { - binary_format_holder_.MemoryMap(file_path); +size_t LoraAdapter::GetSize() const { + if (std::holds_alternative(buffer_)) { + return std::get<1>(buffer_).file_size_; + } else if (std::holds_alternative(buffer_)) { + return std::get<2>(buffer_).buffer_.size(); + } + ORT_THROW("Non-exhaustive visitor for BinaryFormatHolder::GetSize()"); } } // namespace lora diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h index bb887bbe04b85..066f066ed39f7 100644 --- a/onnxruntime/lora/lora_adapters.h +++ b/onnxruntime/lora/lora_adapters.h @@ -3,6 +3,7 @@ #pragma once +#include "core/common/common.h" #include "core/common/inlined_containers.h" #include "core/platform/env.h" #include "core/framework/ort_value.h" @@ -17,24 +18,20 @@ namespace onnxruntime { namespace lora { -struct Adapter; - -namespace details { -// This class takes hold of the serialized parameters that -// are either loaded from disk or mapped from disk (coming in the future) -// This data is always in host memory. -class BinaryFormatHolder { +/// +/// Container to hold and access Lora Parameters +/// +class LoraAdapter { public: - BinaryFormatHolder() = default; - BinaryFormatHolder(const BinaryFormatHolder&) = delete; - BinaryFormatHolder& operator=(const BinaryFormatHolder&) = delete; - ~BinaryFormatHolder(); + LoraAdapter() = default; + ~LoraAdapter() = default; + ORT_DISALLOW_COPY_AND_ASSIGNMENT(LoraAdapter); - BinaryFormatHolder(BinaryFormatHolder&&) = default; - BinaryFormatHolder& operator=(BinaryFormatHolder&&) = default; + LoraAdapter(LoraAdapter&&) = default; + LoraAdapter& operator=(LoraAdapter&&) = default; /// - /// Load parameters from an adapter file and validates its format. + /// Load parameters into memory from an adapter file and validates its format. /// /// file name that can be opened void Load(const std::filesystem::path& file_path); @@ -45,13 +42,27 @@ class BinaryFormatHolder { /// void MemoryMap(const std::filesystem::path& file_path); - // Get Flatbuffer object pointer - const Adapter* GetBinaryAdapter() const noexcept { return adapter_; } + /// + /// Outputs the names and tensor values of the parameters to the + /// specified output iterators + /// + /// output iterator accepting const char* + /// Output Iterator accepting OrtValue + /// + /// + template + void OutputAdaptersParameters(NamesOutputIter names_out, + TensorOutputIter params_out) { + const auto* adapter = binary_format_holder_.GetBinaryAdapter(); + // utils::OutputAdaptersParameters(*adapter, names_out, params_out); + } + + private: + void InitializeParamsValues(); // Get the size of the buffer size_t GetSize() const; - private: struct BufferHolder { explicit BufferHolder(std::vector buffer) : buffer_(std::move(buffer)) {} std::vector buffer_; @@ -64,57 +75,21 @@ class BinaryFormatHolder { size_t file_size_; }; - std::variant buffer_; - const Adapter* adapter_{nullptr}; -}; - -/// -/// Represents a named lora parameter (tensor) -/// -struct LoraParam { - LoraParam() = default; - LoraParam(std::string name, OrtValue parameter); - - std::string name_; - OrtValue ort_value_; -}; - -} // namespace details + std::variant buffer_; /// -/// Container to hold and access Lora Parameters -/// -class LoraAdapter { - public: - LoraAdapter() = default; - LoraAdapter(const LoraAdapter&) = delete; - LoraAdapter& operator=(const LoraAdapter&) = delete; - ~LoraAdapter() = default; - - LoraAdapter(LoraAdapter&&) = default; - LoraAdapter& operator=(LoraAdapter&&) = default; - - /// - /// Load parameters into memory from an adapter file and validates its format. - /// - /// file name that can be opened - void Load(const std::filesystem::path& file_path); - - /// - /// Memory maps adapter file into memory and validates its format. + /// Represents a named lora parameter (tensor) /// - /// - void MemoryMap(const std::filesystem::path& file_path); + struct LoraParam { + LoraParam() = default; + LoraParam(std::string name, OrtValue parameter) noexcept; - template - void OutputAdaptersParameters(NamesOutputIter names_out, - TensorOutputIter params_out) { - const auto* adapter = binary_format_holder_.GetBinaryAdapter(); - utils::OutputAdaptersParameters(*adapter, names_out, params_out); - } + std::string name_; + OrtValue ort_value_; + }; - private: - details::BinaryFormatHolder binary_format_holder_; + const Adapter* adapter_{nullptr}; + InlinedHashMap params_values_; }; } // namespace lora From aec23455076f021c6b1f5dda95508b044c894828 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Tue, 3 Sep 2024 15:36:29 -0700 Subject: [PATCH 06/84] Move Release to create --- cmake/onnxruntime.cmake | 1 + onnxruntime/core/framework/run_options.cc | 15 --------------- onnxruntime/lora/lora_adapters.cc | 21 ++++++++++++++++++++- onnxruntime/lora/lora_adapters.h | 3 ++- 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake index 927b4ac84b037..a946f17e2dfc2 100644 --- a/cmake/onnxruntime.cmake +++ b/cmake/onnxruntime.cmake @@ -207,6 +207,7 @@ set(onnxruntime_INTERNAL_LIBRARIES onnxruntime_optimizer onnxruntime_providers ${onnxruntime_tvm_libs} + onnxruntime_lora onnxruntime_framework onnxruntime_graph onnxruntime_util diff --git a/onnxruntime/core/framework/run_options.cc b/onnxruntime/core/framework/run_options.cc index 00e2a17d60df5..b0ea7f7c9d843 100644 --- a/onnxruntime/core/framework/run_options.cc +++ b/onnxruntime/core/framework/run_options.cc @@ -72,18 +72,3 @@ ORT_API_STATUS_IMPL(OrtApis::RunOptionsSetActiveLoraAdapter, _Inout_ OrtRunOptio return nullptr; API_IMPL_END } - -ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapter, const ORTCHAR_T* adapter_file_path, - _Outptr_ OrtLoraAdapter** adapter) { - API_IMPL_BEGIN - auto lora_adapter = std::make_unique(); - // For platforms that do not support Memmap, we can #ifdef it to ->Load(adapter_file_path) - lora_adapter->Load(adapter_file_path); - *adapter = reinterpret_cast(lora_adapter.release()); - return nullptr; - API_IMPL_END -} - -ORT_API(void, OrtApis::ReleaseLoraAdapter, _Frees_ptr_opt_ OrtLoraAdapter* adapter) { - delete reinterpret_cast(adapter); -} diff --git a/onnxruntime/lora/lora_adapters.cc b/onnxruntime/lora/lora_adapters.cc index 2db4eff754428..d089d7b48aa14 100644 --- a/onnxruntime/lora/lora_adapters.cc +++ b/onnxruntime/lora/lora_adapters.cc @@ -4,6 +4,10 @@ #include "lora_adapters.h" #include "lora_format_utils.h" +#include "core/session/onnxruntime_c_api.h" +#include "core/session/ort_apis.h" +#include "core/framework/error_code_helper.h" + #include #include @@ -55,4 +59,19 @@ size_t LoraAdapter::GetSize() const { } } // namespace lora -} // namespace onnxruntime \ No newline at end of file +} // namespace onnxruntime + +ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapter, const ORTCHAR_T* adapter_file_path, + _Outptr_ OrtLoraAdapter** adapter) { + API_IMPL_BEGIN + auto lora_adapter = std::make_unique(); + // For platforms that do not support Memmap, we can #ifdef it to ->Load(adapter_file_path) + lora_adapter->Load(adapter_file_path); + *adapter = reinterpret_cast(lora_adapter.release()); + return nullptr; + API_IMPL_END +} + +ORT_API(void, OrtApis::ReleaseLoraAdapter, _Frees_ptr_opt_ OrtLoraAdapter* adapter) { + delete reinterpret_cast(adapter); +} diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h index 066f066ed39f7..c373f7cbf6dbe 100644 --- a/onnxruntime/lora/lora_adapters.h +++ b/onnxruntime/lora/lora_adapters.h @@ -25,7 +25,8 @@ class LoraAdapter { public: LoraAdapter() = default; ~LoraAdapter() = default; - ORT_DISALLOW_COPY_AND_ASSIGNMENT(LoraAdapter); + LoraAdapter(const LoraAdapter&) = delete; + LoraAdapter& operator=(const LoraAdapter&) = delete; LoraAdapter(LoraAdapter&&) = default; LoraAdapter& operator=(LoraAdapter&&) = default; From 7bf148df19989688c885c21df54f5607b26f177d Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 4 Sep 2024 11:38:20 -0700 Subject: [PATCH 07/84] Implement unit test --- cmake/onnxruntime_unittests.cmake | 7 +- onnxruntime/lora/lora_adapters.cc | 2 +- onnxruntime/lora/lora_adapters.h | 38 ++++--- onnxruntime/lora/lora_format_utils.h | 14 --- onnxruntime/test/lora/lora_test.cc | 105 ++++++++++++++++++ .../testdata/lora/lora_unit_test_adapter.fb | Bin 0 -> 432 bytes 6 files changed, 137 insertions(+), 29 deletions(-) create mode 100644 onnxruntime/test/lora/lora_test.cc create mode 100644 onnxruntime/test/testdata/lora/lora_unit_test_adapter.fb diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index 87a698af616d0..825635e5415d1 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -263,6 +263,11 @@ file(GLOB onnxruntime_test_flatbuffers_src CONFIGURE_DEPENDS "${TEST_SRC_DIR}/flatbuffers/*.h" ) +file(GLOB onnxruntime_test_lora_src CONFIGURE_DEPENDS + "${TEST_SRC_DIR}/lora/*.cc" + "${TEST_SRC_DIR}/lora/*.h" +) + if(NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_REDUCED_OPS_BUILD) file(GLOB onnxruntime_test_ir_src CONFIGURE_DEPENDS @@ -783,7 +788,7 @@ endif() set(all_tests ${onnxruntime_test_common_src} ${onnxruntime_test_ir_src} ${onnxruntime_test_optimizer_src} ${onnxruntime_test_framework_src} ${onnxruntime_test_providers_src} ${onnxruntime_test_quantization_src} - ${onnxruntime_test_flatbuffers_src}) + ${onnxruntime_test_flatbuffers_src} ${onnxruntime_test_lora_src}) if (onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS) file(GLOB onnxruntime_test_providers_cuda_ut_src CONFIGURE_DEPENDS diff --git a/onnxruntime/lora/lora_adapters.cc b/onnxruntime/lora/lora_adapters.cc index d089d7b48aa14..e2214a15d2563 100644 --- a/onnxruntime/lora/lora_adapters.cc +++ b/onnxruntime/lora/lora_adapters.cc @@ -49,7 +49,7 @@ void LoraAdapter::InitializeParamsValues() { params_values_.swap(params_values); } -size_t LoraAdapter::GetSize() const { +size_t LoraAdapter::GetBufferSize() const { if (std::holds_alternative(buffer_)) { return std::get<1>(buffer_).file_size_; } else if (std::holds_alternative(buffer_)) { diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h index c373f7cbf6dbe..a943e92175030 100644 --- a/onnxruntime/lora/lora_adapters.h +++ b/onnxruntime/lora/lora_adapters.h @@ -25,7 +25,7 @@ class LoraAdapter { public: LoraAdapter() = default; ~LoraAdapter() = default; - LoraAdapter(const LoraAdapter&) = delete; + LoraAdapter(const LoraAdapter&) = delete; LoraAdapter& operator=(const LoraAdapter&) = delete; LoraAdapter(LoraAdapter&&) = default; @@ -44,25 +44,37 @@ class LoraAdapter { void MemoryMap(const std::filesystem::path& file_path); /// - /// Outputs the names and tensor values of the parameters to the - /// specified output iterators + /// Returns number of parameters in the adapter. + /// The number is expected to be even as lora params come in pairs. /// - /// output iterator accepting const char* - /// Output Iterator accepting OrtValue - /// - /// + /// size of params_values_ container + size_t GetParamNum() const { + return params_values_.size(); + } + + /// + /// Outputs Lora Parameters, their names and values + /// into the supplied output iterators. + /// + /// + /// + /// output iterator that accepts const char* + /// output iterator that accepts OrtValue template void OutputAdaptersParameters(NamesOutputIter names_out, - TensorOutputIter params_out) { - const auto* adapter = binary_format_holder_.GetBinaryAdapter(); - // utils::OutputAdaptersParameters(*adapter, names_out, params_out); + TensorOutputIter tensor_out) const { + for (const auto& [name, param] : params_values_) { + *names_out = name.c_str(); + ++names_out; + *tensor_out = param.ort_value_; + ++tensor_out; + } } private: - void InitializeParamsValues(); // Get the size of the buffer - size_t GetSize() const; + size_t GetBufferSize() const; struct BufferHolder { explicit BufferHolder(std::vector buffer) : buffer_(std::move(buffer)) {} @@ -78,7 +90,7 @@ class LoraAdapter { std::variant buffer_; -/// + /// /// Represents a named lora parameter (tensor) /// struct LoraParam { diff --git a/onnxruntime/lora/lora_format_utils.h b/onnxruntime/lora/lora_format_utils.h index c6d8efcab80c9..e5587fd730925 100644 --- a/onnxruntime/lora/lora_format_utils.h +++ b/onnxruntime/lora/lora_format_utils.h @@ -84,20 +84,6 @@ void SaveLoraParameter(flatbuffers::FlatBufferBuilder& flat_builder, std::string /// std::pair CreateOrtValueOverLoraParameter(const Parameter& param); -template -void OutputAdaptersParameters(const Adapter& adapter, - NamesOutputIter names_out, - TensorOutputIter params_out) { - const auto* params = adapter.parameters(); - for (const auto* param : params) { - auto [name, ort_value] = utils::CreateOrtValueOverLoraParameter(*param); - *names_out = std::move(name); - ++names_out; - *params_out = std::move(ort_value); - ++params_out; - } -} - } // namespace utils } // namespace lora } // namespace onnxruntime diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc new file mode 100644 index 0000000000000..832056f35870e --- /dev/null +++ b/onnxruntime/test/lora/lora_test.cc @@ -0,0 +1,105 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "lora/lora_adapters.h" +#include "lora/lora_format_version.h" +#include "lora/lora_format_utils.h" +#include "gtest/gtest.h" + +#include + +namespace onnxruntime { +namespace test { + +// TEST(LoraFormatTest, CreateAdapter) { +// // Generate a random sequence of floats +// // shape = {8, 4} +// constexpr std::array shape = {8, 4}; +// std::vector param_1(32); +// std::iota(param_1.begin(), param_1.end(), 0.0f); +// +// std::vector param_2(32); +// std::iota(param_2.begin(), param_2.end(), 33.0f); +// +// flatbuffers::FlatBufferBuilder builder; +// std::vector> params; +// params.reserve(2); +// flatbuffers::Offset fbs_param_1, fbs_param_2; +// auto byte_span = ReinterpretAsSpan(gsl::make_span(param_1)); +// lora::utils::SaveLoraParameter(builder, "param_1", lora::TensorDataType_FLOAT, shape, +// byte_span, fbs_param_1); +// params.push_back(fbs_param_1); +// +// byte_span = ReinterpretAsSpan(gsl::make_span(param_2)); +// lora::utils::SaveLoraParameter(builder, "param_2", lora::TensorDataType_FLOAT, shape, +// byte_span, fbs_param_2); +// params.push_back(fbs_param_2); +// +// auto fbs_params = builder.CreateVector(params); +// auto fbs_adapter = lora::CreateAdapter(builder, lora::kLoraFormatVersion, 1, 1, fbs_params); +// builder.Finish(fbs_adapter, lora::AdapterIdentifier()); +// +// constexpr const char* const file_name = +// "D:/dmitrism/Downloads/generate-test-model/param_conversion/lora_unit_test_adapter.fb"; +// std::ofstream file(file_name, std::ios::binary); +// ASSERT_TRUE(file.is_open()); +// +// ASSERT_FALSE(file.write(reinterpret_cast(builder.GetBufferPointer()), builder.GetSize()).fail()); +// ASSERT_FALSE(file.flush().fail()); +// file.close(); +// } + +TEST(LoraAdapterTest, Load) { + // XXX: put this into test directory + const std::filesystem::path file_path = "testdata/lora/lora_unit_test_adapter.fb"; + + auto verify_load = [](const lora::LoraAdapter& adapter) { + const auto param_num = adapter.GetParamNum(); + ASSERT_GE(param_num, 0U); + + std::vector names; + std::vector ort_values; + names.reserve(param_num); + ort_values.reserve(param_num); + + adapter.OutputAdaptersParameters(std::back_inserter(names), std::back_inserter(ort_values)); + ASSERT_EQ(param_num, names.size()); + ASSERT_EQ(param_num, ort_values.size()); + + for (size_t i = 0; i < param_num; ++i) { + const auto& name = names[i]; + const auto& ort_value = ort_values[i]; + ASSERT_TRUE(name != nullptr); + ASSERT_TRUE(ort_value.IsTensor()); + + const auto& tensor = ort_value.Get(); + ASSERT_EQ(tensor.GetElementType(), ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT); + + const auto shape = tensor.Shape().GetDims(); + ASSERT_EQ(2, shape.size()); + ASSERT_EQ(8, shape[0]); + ASSERT_EQ(4, shape[1]); + + // Read all the elements to make sure they are accessible + const auto data = tensor.DataAsSpan(); + for (size_t j = 0, lim = data.size(); j < lim; ++j) { + ASSERT_EQ(static_cast(j), data[j]); + } + } + }; + + { + lora::LoraAdapter lora_adapter; + lora_adapter.Load(file_path); + verify_load(lora_adapter); + } + + { + lora::LoraAdapter lora_adapter; + lora_adapter.MemoryMap(file_path); + verify_load(lora_adapter); + } +} + +} // namespace test +} // namespace onnxruntime diff --git a/onnxruntime/test/testdata/lora/lora_unit_test_adapter.fb b/onnxruntime/test/testdata/lora/lora_unit_test_adapter.fb new file mode 100644 index 0000000000000000000000000000000000000000..af696646efe6704d8568af6ec7a2cd7668906c4f GIT binary patch literal 432 zcmaLSEl2}l7zgmjL-4|dg$09QVZmUSVlW8a#z2LI1%p9w1A{@~5QD*R(YR<_G#HG= zMdPAz(P*@tzw@JsaKC&0ydS*JlSRaS_i&&@yQ-=s_1ChTFsIIa`;Y2``b@o6BK0Ct zG8!~lWsMeX*4d!LHaqN+)8~KzLykD+gb`<)b3ws1H{4S85*?&peSG)y6Jtdv##iIZ z)2+y&sozv>5b^gM_04k6FZWS0WyXT&nvWJ8a)yj3D7j<8JyRZd;+YvQ%z0(O8}EE{ L6A%19$7Xy2-Xc2# literal 0 HcmV?d00001 From 9a8f4587bee769461eb920cee3670a5e7de6eb5d Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 4 Sep 2024 11:42:45 -0700 Subject: [PATCH 08/84] Add test data creation code --- onnxruntime/test/lora/lora_test.cc | 40 +-------------- .../testdata/lora/lora_unit_test_adapter.cc | 51 +++++++++++++++++++ 2 files changed, 52 insertions(+), 39 deletions(-) create mode 100644 onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index 832056f35870e..96dce52a90652 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -11,46 +11,8 @@ namespace onnxruntime { namespace test { -// TEST(LoraFormatTest, CreateAdapter) { -// // Generate a random sequence of floats -// // shape = {8, 4} -// constexpr std::array shape = {8, 4}; -// std::vector param_1(32); -// std::iota(param_1.begin(), param_1.end(), 0.0f); -// -// std::vector param_2(32); -// std::iota(param_2.begin(), param_2.end(), 33.0f); -// -// flatbuffers::FlatBufferBuilder builder; -// std::vector> params; -// params.reserve(2); -// flatbuffers::Offset fbs_param_1, fbs_param_2; -// auto byte_span = ReinterpretAsSpan(gsl::make_span(param_1)); -// lora::utils::SaveLoraParameter(builder, "param_1", lora::TensorDataType_FLOAT, shape, -// byte_span, fbs_param_1); -// params.push_back(fbs_param_1); -// -// byte_span = ReinterpretAsSpan(gsl::make_span(param_2)); -// lora::utils::SaveLoraParameter(builder, "param_2", lora::TensorDataType_FLOAT, shape, -// byte_span, fbs_param_2); -// params.push_back(fbs_param_2); -// -// auto fbs_params = builder.CreateVector(params); -// auto fbs_adapter = lora::CreateAdapter(builder, lora::kLoraFormatVersion, 1, 1, fbs_params); -// builder.Finish(fbs_adapter, lora::AdapterIdentifier()); -// -// constexpr const char* const file_name = -// "D:/dmitrism/Downloads/generate-test-model/param_conversion/lora_unit_test_adapter.fb"; -// std::ofstream file(file_name, std::ios::binary); -// ASSERT_TRUE(file.is_open()); -// -// ASSERT_FALSE(file.write(reinterpret_cast(builder.GetBufferPointer()), builder.GetSize()).fail()); -// ASSERT_FALSE(file.flush().fail()); -// file.close(); -// } - TEST(LoraAdapterTest, Load) { - // XXX: put this into test directory + // See file creation code at testdata/lora/lora_unit_test_adapter.cc const std::filesystem::path file_path = "testdata/lora/lora_unit_test_adapter.fb"; auto verify_load = [](const lora::LoraAdapter& adapter) { diff --git a/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc b/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc new file mode 100644 index 0000000000000..1bd714de9ba85 --- /dev/null +++ b/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc @@ -0,0 +1,51 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "lora/lora_format_version.h" +#include "lora/lora_format_utils.h" +#include "gtest/gtest.h" + +#include + +namespace onnxruntime { +namespace test { + +TEST(LoraFormatTest, CreateAdapter) { + // generate a random sequence of floats + // shape = {8, 4} + constexpr std::array shape = {8, 4}; + std::vector param_1(32); + std::iota(param_1.begin(), param_1.end(), 0.0f); + + std::vector param_2(32); + std::iota(param_2.begin(), param_2.end(), 33.0f); + + flatbuffers::flatbufferbuilder builder; + std::vector> params; + params.reserve(2); + flatbuffers::offset fbs_param_1, fbs_param_2; + auto byte_span = reinterpretasspan(gsl::make_span(param_1)); + lora::utils::saveloraparameter(builder, "param_1", lora::tensordatatype_float, shape, + byte_span, fbs_param_1); + params.push_back(fbs_param_1); + + byte_span = reinterpretasspan(gsl::make_span(param_2)); + lora::utils::saveloraparameter(builder, "param_2", lora::tensordatatype_float, shape, + byte_span, fbs_param_2); + params.push_back(fbs_param_2); + + auto fbs_params = builder.createvector(params); + auto fbs_adapter = lora::createadapter(builder, lora::kloraformatversion, 1, 1, fbs_params); + builder.finish(fbs_adapter, lora::adapteridentifier()); + + constexpr const char* const file_name = + "d:/dmitrism/downloads/generate-test-model/param_conversion/lora_unit_test_adapter.fb"; + std::ofstream file(file_name, std::ios::binary); + assert_true(file.is_open()); + + assert_false(file.write(reinterpret_cast(builder.getbufferpointer()), builder.getsize()).fail()); + assert_false(file.flush().fail()); + file.close(); +} +} +} \ No newline at end of file From d0d71c46b51a727cef0c410a45e6200dbab64eb2 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 4 Sep 2024 11:45:40 -0700 Subject: [PATCH 09/84] Use inlined vector --- onnxruntime/test/lora/lora_test.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index 96dce52a90652..e4a2b02a03bc2 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#include "core/common/inlined_containers_fwd.h" #include "lora/lora_adapters.h" #include "lora/lora_format_version.h" #include "lora/lora_format_utils.h" @@ -19,8 +20,8 @@ TEST(LoraAdapterTest, Load) { const auto param_num = adapter.GetParamNum(); ASSERT_GE(param_num, 0U); - std::vector names; - std::vector ort_values; + InlinedVector names; + InlinedVector ort_values; names.reserve(param_num); ort_values.reserve(param_num); From 63a51092efaf5c70d18c755f8cab61c53a48e093 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 4 Sep 2024 14:10:54 -0700 Subject: [PATCH 10/84] Add vector forced alignemtn --- onnxruntime/lora/lora_format/README.md | 2 +- onnxruntime/lora/lora_format/lora_schema.fbs | 2 +- .../lora/lora_format/lora_schema.fbs.h | 97 ++++++++++--------- onnxruntime/lora/lora_format_utils.cc | 4 +- .../testdata/lora/lora_unit_test_adapter.cc | 29 +++--- 5 files changed, 69 insertions(+), 65 deletions(-) diff --git a/onnxruntime/lora/lora_format/README.md b/onnxruntime/lora/lora_format/README.md index ec39974464c1c..d28f47186cbea 100644 --- a/onnxruntime/lora/lora_format/README.md +++ b/onnxruntime/lora/lora_format/README.md @@ -20,7 +20,7 @@ It is possible to use another flatc as well, e.g., from a separate installation. To update the flatbuffers schemas and generated files: 1. Modify [ONNXRuntime Lora Parameter file format schema](lora_schema.fbs). -2. Run [compile_schema.py](./compile_schema.py) to generate the C++ and Python bindings. +2. Run [compile_schema.py](./compile_schema.py) to generate the C++ bindings. ``` python onnxruntime/lora/lora_format/compile_schema.py --flatc diff --git a/onnxruntime/lora/lora_format/lora_schema.fbs b/onnxruntime/lora/lora_format/lora_schema.fbs index 073fe0945517d..37e8195dab6f2 100644 --- a/onnxruntime/lora/lora_format/lora_schema.fbs +++ b/onnxruntime/lora/lora_format/lora_schema.fbs @@ -37,7 +37,7 @@ table Parameter { dims:[int64]; data_type:TensorDataType; - raw_data:[uint8]; + raw_data:[uint8] (force_align : 8); } table Adapter { diff --git a/onnxruntime/lora/lora_format/lora_schema.fbs.h b/onnxruntime/lora/lora_format/lora_schema.fbs.h index 72b27dd355814..a75082af811fc 100644 --- a/onnxruntime/lora/lora_format/lora_schema.fbs.h +++ b/onnxruntime/lora/lora_format/lora_schema.fbs.h @@ -22,55 +22,55 @@ struct ParameterBuilder; struct Adapter; struct AdapterBuilder; -enum TensorDataType : int32_t { - TensorDataType_UNDEFINED = 0, - TensorDataType_FLOAT = 1, - TensorDataType_UINT8 = 2, - TensorDataType_INT8 = 3, - TensorDataType_UINT16 = 4, - TensorDataType_INT16 = 5, - TensorDataType_INT32 = 6, - TensorDataType_INT64 = 7, - TensorDataType_STRING = 8, - TensorDataType_BOOL = 9, - TensorDataType_FLOAT16 = 10, - TensorDataType_DOUBLE = 11, - TensorDataType_UINT32 = 12, - TensorDataType_UINT64 = 13, - TensorDataType_COMPLEX64 = 14, - TensorDataType_COMPLEX128 = 15, - TensorDataType_BFLOAT16 = 16, - TensorDataType_FLOAT8E4M3FN = 17, - TensorDataType_FLOAT8E4M3FNUZ = 18, - TensorDataType_FLOAT8E5M2 = 19, - TensorDataType_FLOAT8E5M2FNUZ = 20, - TensorDataType_MIN = TensorDataType_UNDEFINED, - TensorDataType_MAX = TensorDataType_FLOAT8E5M2FNUZ +enum class TensorDataType : int32_t { + UNDEFINED = 0, + FLOAT = 1, + UINT8 = 2, + INT8 = 3, + UINT16 = 4, + INT16 = 5, + INT32 = 6, + INT64 = 7, + STRING = 8, + BOOL = 9, + FLOAT16 = 10, + DOUBLE = 11, + UINT32 = 12, + UINT64 = 13, + COMPLEX64 = 14, + COMPLEX128 = 15, + BFLOAT16 = 16, + FLOAT8E4M3FN = 17, + FLOAT8E4M3FNUZ = 18, + FLOAT8E5M2 = 19, + FLOAT8E5M2FNUZ = 20, + MIN = UNDEFINED, + MAX = FLOAT8E5M2FNUZ }; inline const TensorDataType (&EnumValuesTensorDataType())[21] { static const TensorDataType values[] = { - TensorDataType_UNDEFINED, - TensorDataType_FLOAT, - TensorDataType_UINT8, - TensorDataType_INT8, - TensorDataType_UINT16, - TensorDataType_INT16, - TensorDataType_INT32, - TensorDataType_INT64, - TensorDataType_STRING, - TensorDataType_BOOL, - TensorDataType_FLOAT16, - TensorDataType_DOUBLE, - TensorDataType_UINT32, - TensorDataType_UINT64, - TensorDataType_COMPLEX64, - TensorDataType_COMPLEX128, - TensorDataType_BFLOAT16, - TensorDataType_FLOAT8E4M3FN, - TensorDataType_FLOAT8E4M3FNUZ, - TensorDataType_FLOAT8E5M2, - TensorDataType_FLOAT8E5M2FNUZ + TensorDataType::UNDEFINED, + TensorDataType::FLOAT, + TensorDataType::UINT8, + TensorDataType::INT8, + TensorDataType::UINT16, + TensorDataType::INT16, + TensorDataType::INT32, + TensorDataType::INT64, + TensorDataType::STRING, + TensorDataType::BOOL, + TensorDataType::FLOAT16, + TensorDataType::DOUBLE, + TensorDataType::UINT32, + TensorDataType::UINT64, + TensorDataType::COMPLEX64, + TensorDataType::COMPLEX128, + TensorDataType::BFLOAT16, + TensorDataType::FLOAT8E4M3FN, + TensorDataType::FLOAT8E4M3FNUZ, + TensorDataType::FLOAT8E5M2, + TensorDataType::FLOAT8E5M2FNUZ }; return values; } @@ -104,7 +104,7 @@ inline const char * const *EnumNamesTensorDataType() { } inline const char *EnumNameTensorDataType(TensorDataType e) { - if (::flatbuffers::IsOutRange(e, TensorDataType_UNDEFINED, TensorDataType_FLOAT8E5M2FNUZ)) return ""; + if (::flatbuffers::IsOutRange(e, TensorDataType::UNDEFINED, TensorDataType::FLOAT8E5M2FNUZ)) return ""; const size_t index = static_cast(e); return EnumNamesTensorDataType()[index]; } @@ -173,7 +173,7 @@ inline ::flatbuffers::Offset CreateParameter( ::flatbuffers::FlatBufferBuilder &_fbb, ::flatbuffers::Offset<::flatbuffers::String> name = 0, ::flatbuffers::Offset<::flatbuffers::Vector> dims = 0, - onnxruntime::lora::TensorDataType data_type = onnxruntime::lora::TensorDataType_UNDEFINED, + onnxruntime::lora::TensorDataType data_type = onnxruntime::lora::TensorDataType::UNDEFINED, ::flatbuffers::Offset<::flatbuffers::Vector> raw_data = 0) { ParameterBuilder builder_(_fbb); builder_.add_raw_data(raw_data); @@ -187,10 +187,11 @@ inline ::flatbuffers::Offset CreateParameterDirect( ::flatbuffers::FlatBufferBuilder &_fbb, const char *name = nullptr, const std::vector *dims = nullptr, - onnxruntime::lora::TensorDataType data_type = onnxruntime::lora::TensorDataType_UNDEFINED, + onnxruntime::lora::TensorDataType data_type = onnxruntime::lora::TensorDataType::UNDEFINED, const std::vector *raw_data = nullptr) { auto name__ = name ? _fbb.CreateString(name) : 0; auto dims__ = dims ? _fbb.CreateVector(*dims) : 0; + if (raw_data) { _fbb.ForceVectorAlignment(raw_data->size(), sizeof(uint8_t), 8); } auto raw_data__ = raw_data ? _fbb.CreateVector(*raw_data) : 0; return onnxruntime::lora::CreateParameter( _fbb, diff --git a/onnxruntime/lora/lora_format_utils.cc b/onnxruntime/lora/lora_format_utils.cc index b9710af4a570e..75604dd62cf4d 100644 --- a/onnxruntime/lora/lora_format_utils.cc +++ b/onnxruntime/lora/lora_format_utils.cc @@ -104,7 +104,9 @@ std::pair CreateOrtValueOverLoraParameter(const Parameter OrtMemoryInfo cpu_meminfo(CPU, OrtAllocatorType::OrtDeviceAllocator); - Tensor::InitOrtValue(DataTypeImpl::TensorTypeFromONNXEnum(data_type)->GetElementType(), + auto elem_type = DataTypeImpl::TensorTypeFromONNXEnum(static_cast(data_type))->GetElementType(); + // const_cast is necessery due to Tensor class API + Tensor::InitOrtValue(elem_type, TensorShape(shape_span), const_cast(param.raw_data()->data()), cpu_meminfo, diff --git a/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc b/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc index 1bd714de9ba85..a72d2a77c4325 100644 --- a/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc +++ b/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc @@ -10,7 +10,7 @@ namespace onnxruntime { namespace test { -TEST(LoraFormatTest, CreateAdapter) { +EST(LoraFormatTest, CreateAdapter) { // generate a random sequence of floats // shape = {8, 4} constexpr std::array shape = {8, 4}; @@ -20,32 +20,33 @@ TEST(LoraFormatTest, CreateAdapter) { std::vector param_2(32); std::iota(param_2.begin(), param_2.end(), 33.0f); - flatbuffers::flatbufferbuilder builder; - std::vector> params; + flatbuffers::FlatBufferBuilder builder; + std::vector> params; params.reserve(2); - flatbuffers::offset fbs_param_1, fbs_param_2; - auto byte_span = reinterpretasspan(gsl::make_span(param_1)); - lora::utils::saveloraparameter(builder, "param_1", lora::tensordatatype_float, shape, + flatbuffers::Offset fbs_param_1, fbs_param_2; + auto byte_span = ReinterpretAsSpan(gsl::make_span(param_1)); + lora::utils::SaveLoraParameter(builder, "param_1", lora::TensorDataType::FLOAT, shape, byte_span, fbs_param_1); params.push_back(fbs_param_1); - byte_span = reinterpretasspan(gsl::make_span(param_2)); - lora::utils::saveloraparameter(builder, "param_2", lora::tensordatatype_float, shape, + byte_span = ReinterpretAsSpan(gsl::make_span(param_2)); + lora::utils::SaveLoraParameter(builder, "param_2", lora::TensorDataType::FLOAT, shape, byte_span, fbs_param_2); params.push_back(fbs_param_2); - auto fbs_params = builder.createvector(params); - auto fbs_adapter = lora::createadapter(builder, lora::kloraformatversion, 1, 1, fbs_params); - builder.finish(fbs_adapter, lora::adapteridentifier()); + auto fbs_params = builder.CreateVector(params); + auto fbs_adapter = lora::CreateAdapter(builder, lora::kLoraFormatVersion, 1, 1, fbs_params); + builder.Finish(fbs_adapter, lora::AdapterIdentifier()); constexpr const char* const file_name = "d:/dmitrism/downloads/generate-test-model/param_conversion/lora_unit_test_adapter.fb"; std::ofstream file(file_name, std::ios::binary); - assert_true(file.is_open()); + ASSERT_TRUE(file.is_open()); - assert_false(file.write(reinterpret_cast(builder.getbufferpointer()), builder.getsize()).fail()); - assert_false(file.flush().fail()); + ASSERT_FALSE(file.write(reinterpret_cast(builder.GetBufferPointer()), builder.GetSize()).fail()); + ASSERT_FALSE(file.flush().fail()); file.close(); } + } } \ No newline at end of file From 58a8c3e5f090de9e58c782aadcab01f2d3eebe66 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 5 Sep 2024 11:30:28 -0700 Subject: [PATCH 11/84] Add Load --- onnxruntime/lora/lora_adapters.cc | 5 +- onnxruntime/lora/lora_adapters.h | 30 ++++ onnxruntime/test/lora/lora_test.cc | 151 +++++++++++++----- .../testdata/lora/lora_unit_test_adapter.cc | 4 + 4 files changed, 153 insertions(+), 37 deletions(-) diff --git a/onnxruntime/lora/lora_adapters.cc b/onnxruntime/lora/lora_adapters.cc index e2214a15d2563..45b27365a116f 100644 --- a/onnxruntime/lora/lora_adapters.cc +++ b/onnxruntime/lora/lora_adapters.cc @@ -19,9 +19,12 @@ LoraAdapter::LoraParam::LoraParam(std::string name, OrtValue ort_value) noexcept void LoraAdapter::Load(const std::filesystem::path& file_path) { auto buffer = utils::LoadLoraAdapterBytes(file_path); + Load(std::move(buffer)); +} + +void LoraAdapter::Load(std::vector buffer) { adapter_ = utils::ValidateAndGetAdapterFromBytes(buffer); buffer_.emplace(std::move(buffer)); - InitializeParamsValues(); } diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h index a943e92175030..45519e4081cd3 100644 --- a/onnxruntime/lora/lora_adapters.h +++ b/onnxruntime/lora/lora_adapters.h @@ -37,6 +37,12 @@ class LoraAdapter { /// file name that can be opened void Load(const std::filesystem::path& file_path); + /// + /// Load parameters from serialized bytes and validates its format. + /// + /// + void Load(std::vector buffer); + /// /// Memory maps adapter file into memory and validates its format. /// @@ -52,6 +58,30 @@ class LoraAdapter { return params_values_.size(); } + /// + /// Gets lora format version + /// + /// + int LoraFormatVersion() const noexcept { + return adapter_->format_version(); + } + + /// + /// Gets adapter version + /// + /// + int AdapterVersion() const noexcept { + return adapter_->adapter_version(); + } + + /// + /// Gets model version for which the adapter was created + /// + /// + int ModelVersion() const noexcept { + return adapter_->model_version(); + } + /// /// Outputs Lora Parameters, their names and values /// into the supplied output iterators. diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index e4a2b02a03bc2..7a62940820618 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -2,62 +2,141 @@ // Licensed under the MIT License. #include "core/common/inlined_containers_fwd.h" +#include "core/framework/data_types_internal.h" #include "lora/lora_adapters.h" #include "lora/lora_format_version.h" #include "lora/lora_format_utils.h" #include "gtest/gtest.h" -#include +#include namespace onnxruntime { namespace test { +namespace { + +constexpr const int kAdapterVersion = 1; +constexpr const int kModelVersion = 1; + +template +struct ReadAndValidateData { + void operator()(const Tensor& parameter) const { + auto data = parameter.DataAsSpan(); + for (size_t i = 0, size = data.size(); i < size; ++i) { + ASSERT_EQ(static_cast(i), data[i]); + } + } +}; + +template <> +struct ReadAndValidateData { + void operator()(const Tensor& parameter) const { + auto data = parameter.DataAsSpan(); + for (size_t i = 0, size = data.size(); i < size; ++i) { + ASSERT_FALSE(std::isnan(data[i])); + ASSERT_TRUE(std::isfinite(data[i])); + ASSERT_EQ(static_cast(i), data[i]); + } + } +}; + +template <> +struct ReadAndValidateData { + void operator()(const Tensor& parameter) const { + auto data = parameter.DataAsSpan(); + for (size_t i = 0, size = data.size(); i < size; ++i) { + ASSERT_FALSE(std::isnan(data[i])); + ASSERT_TRUE(std::isfinite(data[i])); + ASSERT_EQ(static_cast(i), data[i]); + } + } +}; + + +template<> +struct ReadAndValidateData { + void operator()(const Tensor& parameter) const { + auto data = parameter.DataAsSpan(); + for (size_t i = 0, size = data.size(); i < size; ++i) { + ASSERT_FALSE(data[i].IsNaN()); + ASSERT_FALSE(data[i].IsInfinity()); + ASSERT_EQ(static_cast(i), data[i].ToFloat()); + } + } +}; + +template <> +struct ReadAndValidateData { + void operator()(const Tensor& parameter) const { + auto data = parameter.DataAsSpan(); + for (size_t i = 0, size = data.size(); i < size; ++i) { + ASSERT_FALSE(data[i].IsNaN()); + ASSERT_FALSE(data[i].IsInfinity()); + ASSERT_EQ(static_cast(i), data[i].ToFloat()); + } + } +}; + +auto verify_load = [](const lora::LoraAdapter& adapter) { + ASSERT_EQ(kAdapterVersion, adapter.AdapterVersion()); + ASSERT_EQ(kModelVersion, adapter.ModelVersion()); + + const auto param_num = adapter.GetParamNum(); + ASSERT_GE(param_num, 0U); + + InlinedVector names; + InlinedVector ort_values; + names.reserve(param_num); + ort_values.reserve(param_num); + + adapter.OutputAdaptersParameters(std::back_inserter(names), std::back_inserter(ort_values)); + ASSERT_EQ(param_num, names.size()); + ASSERT_EQ(param_num, ort_values.size()); + + for (size_t i = 0; i < param_num; ++i) { + const auto& name = names[i]; + const auto& ort_value = ort_values[i]; + ASSERT_TRUE(name != nullptr); + ASSERT_TRUE(ort_value.IsTensor()); + + const auto& tensor = ort_value.Get(); + ASSERT_NE(tensor.GetElementType(), ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED); + + const auto shape = tensor.Shape().GetDims(); + ASSERT_EQ(2, shape.size()); + ASSERT_EQ(8, shape[0]); + ASSERT_EQ(4, shape[1]); + + // Read all the elements to make sure they are accessible + // only on CPU + const auto& mem_info = tensor.Location(); + if (mem_info.device.Type() == OrtDevice::CPU) { + utils::MLTypeCallDispatcher + disp(tensor.GetElementType()); + disp.Invoke(tensor); + } + } +}; + +} // namespace + TEST(LoraAdapterTest, Load) { // See file creation code at testdata/lora/lora_unit_test_adapter.cc + // This is float const std::filesystem::path file_path = "testdata/lora/lora_unit_test_adapter.fb"; - auto verify_load = [](const lora::LoraAdapter& adapter) { - const auto param_num = adapter.GetParamNum(); - ASSERT_GE(param_num, 0U); - - InlinedVector names; - InlinedVector ort_values; - names.reserve(param_num); - ort_values.reserve(param_num); - - adapter.OutputAdaptersParameters(std::back_inserter(names), std::back_inserter(ort_values)); - ASSERT_EQ(param_num, names.size()); - ASSERT_EQ(param_num, ort_values.size()); - - for (size_t i = 0; i < param_num; ++i) { - const auto& name = names[i]; - const auto& ort_value = ort_values[i]; - ASSERT_TRUE(name != nullptr); - ASSERT_TRUE(ort_value.IsTensor()); - - const auto& tensor = ort_value.Get(); - ASSERT_EQ(tensor.GetElementType(), ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT); - - const auto shape = tensor.Shape().GetDims(); - ASSERT_EQ(2, shape.size()); - ASSERT_EQ(8, shape[0]); - ASSERT_EQ(4, shape[1]); - - // Read all the elements to make sure they are accessible - const auto data = tensor.DataAsSpan(); - for (size_t j = 0, lim = data.size(); j < lim; ++j) { - ASSERT_EQ(static_cast(j), data[j]); - } - } - }; - { + // Test memory load lora::LoraAdapter lora_adapter; lora_adapter.Load(file_path); verify_load(lora_adapter); } { + // Test memory map lora::LoraAdapter lora_adapter; lora_adapter.MemoryMap(file_path); verify_load(lora_adapter); diff --git a/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc b/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc index a72d2a77c4325..b185ba676cb73 100644 --- a/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc +++ b/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc @@ -10,6 +10,10 @@ namespace onnxruntime { namespace test { +constexpr const int kAdapterVersion = 1; +constexpr const int kModelVersion = 1; + + EST(LoraFormatTest, CreateAdapter) { // generate a random sequence of floats // shape = {8, 4} From 3e969b361b9076c51e1ae61e956d145d3e01b673 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 5 Sep 2024 13:41:18 -0700 Subject: [PATCH 12/84] Make test in memory --- onnxruntime/test/lora/lora_test.cc | 103 ++++++++++++++---- .../testdata/lora/lora_unit_test_adapter.cc | 56 ---------- .../testdata/lora/lora_unit_test_adapter.fb | Bin 432 -> 0 bytes 3 files changed, 83 insertions(+), 76 deletions(-) delete mode 100644 onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc delete mode 100644 onnxruntime/test/testdata/lora/lora_unit_test_adapter.fb diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index 7a62940820618..9015043554e80 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -3,6 +3,8 @@ #include "core/common/inlined_containers_fwd.h" #include "core/framework/data_types_internal.h" +#include "core/framework/to_tensor_proto_element_type.h" + #include "lora/lora_adapters.h" #include "lora/lora_format_version.h" #include "lora/lora_format_utils.h" @@ -22,7 +24,7 @@ template struct ReadAndValidateData { void operator()(const Tensor& parameter) const { auto data = parameter.DataAsSpan(); - for (size_t i = 0, size = data.size(); i < size; ++i) { + for (size_t i = static_cast(data[0]), size = data.size(); i < size; ++i) { ASSERT_EQ(static_cast(i), data[i]); } } @@ -32,7 +34,7 @@ template <> struct ReadAndValidateData { void operator()(const Tensor& parameter) const { auto data = parameter.DataAsSpan(); - for (size_t i = 0, size = data.size(); i < size; ++i) { + for (size_t i = static_cast(data[0]), size = data.size(); i < size; ++i) { ASSERT_FALSE(std::isnan(data[i])); ASSERT_TRUE(std::isfinite(data[i])); ASSERT_EQ(static_cast(i), data[i]); @@ -44,7 +46,7 @@ template <> struct ReadAndValidateData { void operator()(const Tensor& parameter) const { auto data = parameter.DataAsSpan(); - for (size_t i = 0, size = data.size(); i < size; ++i) { + for (size_t i = static_cast(data[0]), size = data.size(); i < size; ++i) { ASSERT_FALSE(std::isnan(data[i])); ASSERT_TRUE(std::isfinite(data[i])); ASSERT_EQ(static_cast(i), data[i]); @@ -52,12 +54,11 @@ struct ReadAndValidateData { } }; - -template<> +template <> struct ReadAndValidateData { void operator()(const Tensor& parameter) const { auto data = parameter.DataAsSpan(); - for (size_t i = 0, size = data.size(); i < size; ++i) { + for (size_t i = static_cast(data[0].ToFloat()), size = data.size(); i < size; ++i) { ASSERT_FALSE(data[i].IsNaN()); ASSERT_FALSE(data[i].IsInfinity()); ASSERT_EQ(static_cast(i), data[i].ToFloat()); @@ -69,7 +70,7 @@ template <> struct ReadAndValidateData { void operator()(const Tensor& parameter) const { auto data = parameter.DataAsSpan(); - for (size_t i = 0, size = data.size(); i < size; ++i) { + for (size_t i = static_cast(data[0].ToFloat()), size = data.size(); i < size; ++i) { ASSERT_FALSE(data[i].IsNaN()); ASSERT_FALSE(data[i].IsInfinity()); ASSERT_EQ(static_cast(i), data[i].ToFloat()); @@ -121,26 +122,88 @@ auto verify_load = [](const lora::LoraAdapter& adapter) { } }; -} // namespace +constexpr const std::array param_shape = {8, 4}; -TEST(LoraAdapterTest, Load) { - // See file creation code at testdata/lora/lora_unit_test_adapter.cc - // This is float - const std::filesystem::path file_path = "testdata/lora/lora_unit_test_adapter.fb"; +template +struct CreateParam { + InlinedVector operator()() const { + InlinedVector param(32); + std::iota(param.begin(), param.end(), T{0}); + return param; + } +}; - { - // Test memory load - lora::LoraAdapter lora_adapter; - lora_adapter.Load(file_path); - verify_load(lora_adapter); +template +struct GenerateTestParameters { + std::vector operator()() const { + constexpr const auto data_type = utils::ToTensorProtoElementType(); + + InlinedVector param_1(32); + InlinedVector param_2(32); + if constexpr (std::is_same::value || std::is_same::value) { + for (float f = 0.f; f < 32; ++f) { + param_1[static_cast(f)] = static_cast(f); + param_2[static_cast(f)] = static_cast(f + 32); + } + } else { + std::iota(param_1.begin(), param_1.end(), T{0}); + std::iota(param_2.begin(), param_2.end(), T{32}); + } + + flatbuffers::FlatBufferBuilder builder; + std::vector> params; + params.reserve(2); + + flatbuffers::Offset fbs_param_1, fbs_param_2; + auto byte_span = ReinterpretAsSpan(gsl::make_span(param_1)); + lora::utils::SaveLoraParameter(builder, "param_1", static_cast(data_type), param_shape, + byte_span, fbs_param_1); + params.push_back(fbs_param_1); + + byte_span = ReinterpretAsSpan(gsl::make_span(param_2)); + lora::utils::SaveLoraParameter(builder, "param_2", static_cast(data_type), param_shape, + byte_span, fbs_param_2); + params.push_back(fbs_param_2); + + auto fbs_params = builder.CreateVector(params); + auto fbs_adapter = lora::CreateAdapter(builder, lora::kLoraFormatVersion, kAdapterVersion, kModelVersion, + fbs_params); + builder.Finish(fbs_adapter, lora::AdapterIdentifier()); + + std::vector result; + result.reserve(builder.GetSize()); + gsl::span buffer(builder.GetBufferPointer(), builder.GetSize()); + std::copy(buffer.begin(), buffer.end(), std::back_inserter(result)); + return result; } +}; - { - // Test memory map +template +struct TestDataType { + void operator()() const { + const auto test_params = GenerateTestParameters()(); lora::LoraAdapter lora_adapter; - lora_adapter.MemoryMap(file_path); + lora_adapter.Load(std::move(test_params)); verify_load(lora_adapter); } +}; + +} // namespace + +TEST(LoraAdapterTest, Load) { + // Test different data types + const auto data_types = gsl::make_span(lora::EnumValuesTensorDataType()); + for (size_t i = 1, size = data_types.size(); i < size; ++i) { + if (i == 8 || i == 9 || i == 14 || i == 15 || (i > 16 && i < 21)) + continue; + + utils::MLTypeCallDispatcher + disp(static_cast(data_types[i])); + disp.Invoke(); + } } } // namespace test diff --git a/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc b/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc deleted file mode 100644 index b185ba676cb73..0000000000000 --- a/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#include "lora/lora_format_version.h" -#include "lora/lora_format_utils.h" -#include "gtest/gtest.h" - -#include - -namespace onnxruntime { -namespace test { - -constexpr const int kAdapterVersion = 1; -constexpr const int kModelVersion = 1; - - -EST(LoraFormatTest, CreateAdapter) { - // generate a random sequence of floats - // shape = {8, 4} - constexpr std::array shape = {8, 4}; - std::vector param_1(32); - std::iota(param_1.begin(), param_1.end(), 0.0f); - - std::vector param_2(32); - std::iota(param_2.begin(), param_2.end(), 33.0f); - - flatbuffers::FlatBufferBuilder builder; - std::vector> params; - params.reserve(2); - flatbuffers::Offset fbs_param_1, fbs_param_2; - auto byte_span = ReinterpretAsSpan(gsl::make_span(param_1)); - lora::utils::SaveLoraParameter(builder, "param_1", lora::TensorDataType::FLOAT, shape, - byte_span, fbs_param_1); - params.push_back(fbs_param_1); - - byte_span = ReinterpretAsSpan(gsl::make_span(param_2)); - lora::utils::SaveLoraParameter(builder, "param_2", lora::TensorDataType::FLOAT, shape, - byte_span, fbs_param_2); - params.push_back(fbs_param_2); - - auto fbs_params = builder.CreateVector(params); - auto fbs_adapter = lora::CreateAdapter(builder, lora::kLoraFormatVersion, 1, 1, fbs_params); - builder.Finish(fbs_adapter, lora::AdapterIdentifier()); - - constexpr const char* const file_name = - "d:/dmitrism/downloads/generate-test-model/param_conversion/lora_unit_test_adapter.fb"; - std::ofstream file(file_name, std::ios::binary); - ASSERT_TRUE(file.is_open()); - - ASSERT_FALSE(file.write(reinterpret_cast(builder.GetBufferPointer()), builder.GetSize()).fail()); - ASSERT_FALSE(file.flush().fail()); - file.close(); -} - -} -} \ No newline at end of file diff --git a/onnxruntime/test/testdata/lora/lora_unit_test_adapter.fb b/onnxruntime/test/testdata/lora/lora_unit_test_adapter.fb deleted file mode 100644 index af696646efe6704d8568af6ec7a2cd7668906c4f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 432 zcmaLSEl2}l7zgmjL-4|dg$09QVZmUSVlW8a#z2LI1%p9w1A{@~5QD*R(YR<_G#HG= zMdPAz(P*@tzw@JsaKC&0ydS*JlSRaS_i&&@yQ-=s_1ChTFsIIa`;Y2``b@o6BK0Ct zG8!~lWsMeX*4d!LHaqN+)8~KzLykD+gb`<)b3ws1H{4S85*?&peSG)y6Jtdv##iIZ z)2+y&sozv>5b^gM_04k6FZWS0WyXT&nvWJ8a)yj3D7j<8JyRZd;+YvQ%z0(O8}EE{ L6A%19$7Xy2-Xc2# From 2c8b5e261f7583c1492c2a48bf95ae0fb32a7685 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 5 Sep 2024 14:53:18 -0700 Subject: [PATCH 13/84] Fix name moving --- onnxruntime/lora/lora_adapters.cc | 7 ++++--- onnxruntime/lora/lora_adapters.h | 3 +-- onnxruntime/test/lora/lora_test.cc | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/onnxruntime/lora/lora_adapters.cc b/onnxruntime/lora/lora_adapters.cc index 45b27365a116f..90fae678f7681 100644 --- a/onnxruntime/lora/lora_adapters.cc +++ b/onnxruntime/lora/lora_adapters.cc @@ -14,8 +14,8 @@ namespace onnxruntime { namespace lora { -LoraAdapter::LoraParam::LoraParam(std::string name, OrtValue ort_value) noexcept - : name_(std::move(name)), ort_value_(std::move(ort_value)) {} +LoraAdapter::LoraParam::LoraParam(OrtValue ort_value) noexcept + : ort_value_(std::move(ort_value)) {} void LoraAdapter::Load(const std::filesystem::path& file_path) { auto buffer = utils::LoadLoraAdapterBytes(file_path); @@ -47,7 +47,8 @@ void LoraAdapter::InitializeParamsValues() { params_values.reserve(params->size()); for (const auto* param : *params) { auto [name, ort_value] = utils::CreateOrtValueOverLoraParameter(*param); - params_values.emplace(name, LoraParam(std::move(name), std::move(ort_value))); + LoraParam lora_param(std::move(ort_value)); + params_values.emplace(std::move(name), std::move(lora_param)); } params_values_.swap(params_values); } diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h index 45519e4081cd3..e90e06f526b9b 100644 --- a/onnxruntime/lora/lora_adapters.h +++ b/onnxruntime/lora/lora_adapters.h @@ -125,9 +125,8 @@ class LoraAdapter { /// struct LoraParam { LoraParam() = default; - LoraParam(std::string name, OrtValue parameter) noexcept; + explicit LoraParam(OrtValue parameter) noexcept; - std::string name_; OrtValue ort_value_; }; diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index 9015043554e80..6393cbc697030 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -83,7 +83,7 @@ auto verify_load = [](const lora::LoraAdapter& adapter) { ASSERT_EQ(kModelVersion, adapter.ModelVersion()); const auto param_num = adapter.GetParamNum(); - ASSERT_GE(param_num, 0U); + ASSERT_EQ(param_num, 2U); InlinedVector names; InlinedVector ort_values; From a2497848d0ee3a189bdbf2379a28cd1da509c8e8 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 5 Sep 2024 15:09:18 -0700 Subject: [PATCH 14/84] Add OrtAllocator parameter --- .../onnxruntime/core/session/onnxruntime_c_api.h | 7 +++++-- .../onnxruntime/core/session/onnxruntime_cxx_api.h | 4 +++- .../core/session/onnxruntime_cxx_inline.h | 5 +++-- onnxruntime/core/session/ort_apis.h | 4 ++-- onnxruntime/lora/lora_adapters.cc | 10 +++++++--- onnxruntime/lora/lora_adapters.h | 14 ++++++++++---- onnxruntime/lora/lora_format_utils.cc | 12 +++++++++++- onnxruntime/lora/lora_format_utils.h | 9 +++++++++ 8 files changed, 50 insertions(+), 15 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index cffe1d8e77af5..8cefb7ef7a3fb 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -4677,16 +4677,19 @@ struct OrtApi { * the platform does not support memory mapping, in which case the file will be read into memory. * * \param[in] adapter_file_path Absolute file path to the adapter file. + * \param[in] allocator optional pointer to a device allocator. If specified + * data is copied to the device at some point before Run() is invoked, if nullptr data stays on CPU. + * The data would still be copied to device if required by the model at inference time. * \param[out] out A pointer to a newly created OrtLoraAdapter instance. Must be released with * OrtApi::ReleaseLoraAdapter. */ - ORT_API2_STATUS(CreateLoraAdapter, const ORTCHAR_T* adapter_file_path, _Outptr_ OrtLoraAdapter** out); + ORT_API2_STATUS(CreateLoraAdapter, const ORTCHAR_T* adapter_file_path, _In_ OrtAllocator* allocator, + _Outptr_ OrtLoraAdapter** out); /** \brief Release an ::OrtLoraAdapter obtained from OrtApi::CreateLoraAdapter */ ORT_CLASS_RELEASE(LoraAdapter); - /** \brief Set the active Lora Adapter for the run options * * The function sets the active Lora Adapter for the run options. The Lora Adapter must be created with diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h index b8f61d2e3d22f..468317099cd09 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h @@ -743,7 +743,9 @@ struct LoraAdapter : detail::Base { /// /// The function attempts to load the adapter from the specified file /// \param absolute_adapter_path The absolute path to the Lora adapter - explicit LoraAdapter(const std::basic_string& absolute_adapter_path); + /// \param allocator optional pointer to a device allocator. If nullptr, the data stays on CPU. It would still + /// be copied to device if required by the model at inference time. + explicit LoraAdapter(const std::basic_string& absolute_adapter_path, OrtAllocator* allocator); }; /** \brief RunOptions diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h index f7290aa610ff4..805a2553a0530 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h @@ -557,8 +557,8 @@ inline void CustomOpDomain::Add(const OrtCustomOp* op) { ThrowOnError(GetApi().CustomOpDomain_Add(p_, op)); } -inline LoraAdapter::LoraAdapter(const std::basic_string& absolute_adapter_path) { - ThrowOnError(GetApi().CreateLoraAdapter(absolute_adapter_path.c_str(), &p_)); +inline LoraAdapter::LoraAdapter(const std::basic_string& absolute_adapter_path, OrtAllocator* allocator) { + ThrowOnError(GetApi().CreateLoraAdapter(absolute_adapter_path.c_str(), allocator, &p_)); } inline RunOptions::RunOptions() { @@ -615,6 +615,7 @@ inline RunOptions& RunOptions::UnsetTerminate() { inline RunOptions& RunOptions::SetLoraAdapterActive(const LoraAdapter& adapter) { ThrowOnError(GetApi().RunOptionsSetActiveLoraAdapter(p_, adapter)); + return *this; } namespace detail { diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h index 8564985beb9f7..210ef607c053e 100644 --- a/onnxruntime/core/session/ort_apis.h +++ b/onnxruntime/core/session/ort_apis.h @@ -120,7 +120,6 @@ ORT_API_STATUS_IMPL(RunOptionsSetTerminate, _Inout_ OrtRunOptions* options); ORT_API_STATUS_IMPL(RunOptionsUnsetTerminate, _Inout_ OrtRunOptions* options); ORT_API_STATUS_IMPL(RunOptionsSetActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter*); - ORT_API_STATUS_IMPL(CreateTensorAsOrtValue, _Inout_ OrtAllocator* allocator, _In_ const int64_t* shape, size_t shape_len, ONNXTensorElementDataType type, _Outptr_ OrtValue** out); @@ -526,7 +525,8 @@ ORT_API_STATUS_IMPL(KernelContext_GetScratchBuffer, _In_ const OrtKernelContext* ORT_API_STATUS_IMPL(KernelInfoGetAllocator, _In_ const OrtKernelInfo* info, _In_ OrtMemType mem_type, _Outptr_ OrtAllocator** out); -ORT_API_STATUS_IMPL(CreateLoraAdapter, _In_ const ORTCHAR_T* adapter_file_path, _Outptr_ OrtLoraAdapter** out); +ORT_API_STATUS_IMPL(CreateLoraAdapter, _In_ const ORTCHAR_T* adapter_file_path, _In_ OrtAllocator* allocator, + _Outptr_ OrtLoraAdapter** out); ORT_API(void, ReleaseLoraAdapter, _Frees_ptr_opt_ OrtLoraAdapter*); ORT_API_STATUS_IMPL(RunOptionsSetActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter* adapter); diff --git a/onnxruntime/lora/lora_adapters.cc b/onnxruntime/lora/lora_adapters.cc index 90fae678f7681..b4c5aff90f03d 100644 --- a/onnxruntime/lora/lora_adapters.cc +++ b/onnxruntime/lora/lora_adapters.cc @@ -14,8 +14,12 @@ namespace onnxruntime { namespace lora { -LoraAdapter::LoraParam::LoraParam(OrtValue ort_value) noexcept - : ort_value_(std::move(ort_value)) {} +LoraAdapter::LoraParam::LoraParam(OrtValue ort_value_mapped) noexcept + : ort_value_mapped_(std::move(ort_value_mapped)) {} + +LoraAdapter::LoraParam::LoraParam(OrtValue ort_value_mapped, OrtValue ort_value_device) noexcept + : ort_value_mapped_(std::move(ort_value_mapped)), ort_value_device_(std::move(ort_value_device)) { +} void LoraAdapter::Load(const std::filesystem::path& file_path) { auto buffer = utils::LoadLoraAdapterBytes(file_path); @@ -65,7 +69,7 @@ size_t LoraAdapter::GetBufferSize() const { } // namespace lora } // namespace onnxruntime -ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapter, const ORTCHAR_T* adapter_file_path, +ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapter, const ORTCHAR_T* adapter_file_path, _In_ OrtAllocator* /* allocator */, _Outptr_ OrtLoraAdapter** adapter) { API_IMPL_BEGIN auto lora_adapter = std::make_unique(); diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h index e90e06f526b9b..865fbe3f21cf2 100644 --- a/onnxruntime/lora/lora_adapters.h +++ b/onnxruntime/lora/lora_adapters.h @@ -5,8 +5,9 @@ #include "core/common/common.h" #include "core/common/inlined_containers.h" -#include "core/platform/env.h" +#include "core/framework/allocator.h" #include "core/framework/ort_value.h" +#include "core/platform/env.h" #include "lora/lora_format_utils.h" @@ -24,6 +25,8 @@ namespace lora { class LoraAdapter { public: LoraAdapter() = default; + explicit LoraAdapter(AllocatorPtr device_allocator) + : device_allocator_(std::move(device_allocator)) {} ~LoraAdapter() = default; LoraAdapter(const LoraAdapter&) = delete; LoraAdapter& operator=(const LoraAdapter&) = delete; @@ -96,7 +99,7 @@ class LoraAdapter { for (const auto& [name, param] : params_values_) { *names_out = name.c_str(); ++names_out; - *tensor_out = param.ort_value_; + *tensor_out = param.ort_value_mapped_; ++tensor_out; } } @@ -125,11 +128,14 @@ class LoraAdapter { /// struct LoraParam { LoraParam() = default; - explicit LoraParam(OrtValue parameter) noexcept; + explicit LoraParam(OrtValue ort_value_mapped) noexcept; + LoraParam(OrtValue ort_value_mapped, OrtValue ort_value_device) noexcept; - OrtValue ort_value_; + OrtValue ort_value_mapped_; + OrtValue ort_value_device_; }; + AllocatorPtr device_allocator_; const Adapter* adapter_{nullptr}; InlinedHashMap params_values_; }; diff --git a/onnxruntime/lora/lora_format_utils.cc b/onnxruntime/lora/lora_format_utils.cc index 75604dd62cf4d..378f2833904ca 100644 --- a/onnxruntime/lora/lora_format_utils.cc +++ b/onnxruntime/lora/lora_format_utils.cc @@ -102,7 +102,7 @@ std::pair CreateOrtValueOverLoraParameter(const Parameter const auto data_type = param.data_type(); gsl::span shape_span(param.dims()->data(), param.dims()->size()); - OrtMemoryInfo cpu_meminfo(CPU, OrtAllocatorType::OrtDeviceAllocator); + static const OrtMemoryInfo cpu_meminfo(CPU, OrtAllocatorType::OrtDeviceAllocator); auto elem_type = DataTypeImpl::TensorTypeFromONNXEnum(static_cast(data_type))->GetElementType(); // const_cast is necessery due to Tensor class API @@ -114,6 +114,16 @@ std::pair CreateOrtValueOverLoraParameter(const Parameter return std::make_pair(std::move(name), std::move(result)); } + +OrtValue CreateOrtValueOnDevice(const OrtValue& ort_value_mapped, const AllocatorPtr& device_allocator) { + OrtValue result; + + const auto& tensor = ort_value_mapped.Get(); + Tensor on_device(tensor.DataType(), tensor.Shape(), device_allocator); + + return result; +} + } // namespace utils } // namespace lora } // namespace onnxruntime diff --git a/onnxruntime/lora/lora_format_utils.h b/onnxruntime/lora/lora_format_utils.h index e5587fd730925..3c0bede4c5f2b 100644 --- a/onnxruntime/lora/lora_format_utils.h +++ b/onnxruntime/lora/lora_format_utils.h @@ -4,6 +4,7 @@ #pragma once #include "core/common/flatbuffers.h" +#include "core/framework/allocator.h" #include "core/platform/env.h" #include @@ -84,6 +85,14 @@ void SaveLoraParameter(flatbuffers::FlatBufferBuilder& flat_builder, std::string /// std::pair CreateOrtValueOverLoraParameter(const Parameter& param); +/// +/// Allocates OrtValue on specified device and copies data there +/// +/// parameter on CPU +/// supplied device allocator +/// +OrtValue CreateOrtValueOnDevice(const OrtValue& ort_value_mapped, const AllocatorPtr& device_allocator); + } // namespace utils } // namespace lora } // namespace onnxruntime From c8250422f19d29e79602e8961cdcb059c97f0fad Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 5 Sep 2024 16:13:58 -0700 Subject: [PATCH 15/84] Make Run() calls Lora aware --- .../onnxruntime/core/framework/run_options.h | 4 +- .../core/session/onnxruntime_c_api.h | 1 + .../core/session/onnxruntime_cxx_api.h | 6 ++ onnxruntime/core/session/onnxruntime_c_api.cc | 61 ++++++++++++++++--- onnxruntime/lora/lora_adapters.h | 8 +-- onnxruntime/test/lora/lora_test.cc | 10 +-- 6 files changed, 72 insertions(+), 18 deletions(-) diff --git a/include/onnxruntime/core/framework/run_options.h b/include/onnxruntime/core/framework/run_options.h index aa741af0f1643..ffe5c61f506c0 100644 --- a/include/onnxruntime/core/framework/run_options.h +++ b/include/onnxruntime/core/framework/run_options.h @@ -5,6 +5,8 @@ #include #include + +#include "core/common/inlined_containers_fwd.h" #include "core/session/onnxruntime_c_api.h" #include "core/framework/config_options.h" @@ -46,7 +48,7 @@ struct OrtRunOptions { // /include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h onnxruntime::ConfigOptions config_options; - std::vector active_adapters_; + onnxruntime::InlinedVector active_adapters_; OrtRunOptions() = default; ~OrtRunOptions() = default; diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 8cefb7ef7a3fb..d0e3507fe7949 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -4697,6 +4697,7 @@ struct OrtApi { * The instance of the OrtRunOptions will then can be used to customize the OrtSession::Run() calls. * More than one OrtLoraAdapter can be set active at the same time. Lora Parameters that belong to difference * Lora adapters that will be active at the same time must not overlap. + * This setting does not affect RunWithBinding. * * \param[in] options OrtRunOptions instance * \param[in] adapter OrtLoraAdapter instance diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h index 468317099cd09..d22051ded78ef 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h @@ -779,6 +779,12 @@ struct RunOptions : detail::Base { */ RunOptions& UnsetTerminate(); + /** \brief Designates the argument as an active adapter for the session Run() calls. + * The setting does not affect RunWithBinding() calls. + * + * Wraps OrtApi::RunOptionsSetLoraAdapterActive + * \param adapter The LoraAdapter to be used as the active adapter + */ RunOptions& SetLoraAdapterActive(const LoraAdapter& adapter); }; diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index 96f2ee1e14ee1..be39632dc270e 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -39,6 +39,8 @@ #include "core/platform/ort_mutex.h" #include "core/common/string_helper.h" +#include "lora/lora_adapters.h" + #ifdef USE_CUDA #include "core/providers/cuda/cuda_provider_factory.h" #include "core/providers/cuda/cuda_execution_provider_info.h" @@ -813,6 +815,37 @@ ORT_API_STATUS_IMPL(OrtApis::CreateSessionFromArray, _In_ const OrtEnv* env, _In API_IMPL_END } +namespace { +// Checks if there are active lora adapters and adjusts input spans. +void CheckAndAdjustForLora(const OrtRunOptions* run_options, + InlinedVector& input_names_with_lora, + InlinedVector input_with_lora, + gsl::span& input_names, + gsl::span& inputs) { + if (!run_options->active_adapters_.empty()) { + size_t total_lora_params = 0; + for (const lora::LoraAdapter* ad : run_options->active_adapters_) { + total_lora_params += ad->GetParamNum(); + } + + input_names_with_lora.reserve(input_names.size() + total_lora_params); + input_with_lora.reserve(inputs.size() + total_lora_params); + std::copy(input_names.begin(), input_names.end(), std::back_inserter(input_names_with_lora)); + std::copy(inputs.begin(), inputs.end(), std::back_inserter(input_with_lora)); + + // XXX: Currently only on CPU. + for (const lora::LoraAdapter* ad : run_options->active_adapters_) { + ad->OutputLoadedAdaptersParameters(std::back_inserter(input_names_with_lora), + std::back_inserter(input_with_lora)); + } + + input_names = gsl::make_span(input_names_with_lora); + inputs = gsl::make_span(input_with_lora); + } +} + +} // namespace + ORT_API_STATUS_IMPL(OrtApis::Run, _Inout_ OrtSession* sess, _In_opt_ const OrtRunOptions* run_options, _In_reads_(input_len) const char* const* input_names, _In_reads_(input_len) const OrtValue* const* input, size_t input_len, @@ -821,19 +854,26 @@ ORT_API_STATUS_IMPL(OrtApis::Run, _Inout_ OrtSession* sess, _In_opt_ const OrtRu API_IMPL_BEGIN auto session = reinterpret_cast<::onnxruntime::InferenceSession*>(sess); - gsl::span input_names_span(input_names, input_len); - gsl::span input_span(input, input_len); - gsl::span output_name_span(output_names, output_names_len); - gsl::span output_span(output, output_names_len); + auto input_names_span = gsl::make_span(input_names, input_len); + auto input_span = gsl::make_span(input, input_len); + auto output_name_span = gsl::make_span(output_names, output_names_len); + auto output_span = gsl::make_span(output, output_names_len); Status status; if (run_options) { + + InlinedVector input_names_with_lora; + InlinedVector input_with_lora; + + CheckAndAdjustForLora(run_options, input_names_with_lora, input_with_lora, input_names_span, input_span); + status = session->Run(*run_options, input_names_span, input_span, output_name_span, output_span); } else { + const RunOptions default_run_options; status = session->Run(default_run_options, input_names_span, @@ -854,10 +894,15 @@ ORT_API_STATUS_IMPL(OrtApis::RunAsync, _Inout_ OrtSession* sess, _In_opt_ const API_IMPL_BEGIN auto session = reinterpret_cast<::onnxruntime::InferenceSession*>(sess); - gsl::span input_names_span(input_names, input_len); - gsl::span input_span(input, input_len); - gsl::span output_name_span(output_names, output_names_len); - gsl::span output_span(output, output_names_len); + auto input_names_span = gsl::make_span(input_names, input_len); + auto input_span = gsl::make_span(input, input_len); + auto output_name_span = gsl::make_span(output_names, output_names_len); + auto output_span = gsl::make_span(output, output_names_len); + + InlinedVector input_names_with_lora; + InlinedVector input_with_lora; + + CheckAndAdjustForLora(run_options, input_names_with_lora, input_with_lora, input_names_span, input_span); return ToOrtStatus(session->RunAsync(run_options, input_names_span, diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h index 865fbe3f21cf2..fa9aeaba10232 100644 --- a/onnxruntime/lora/lora_adapters.h +++ b/onnxruntime/lora/lora_adapters.h @@ -86,20 +86,20 @@ class LoraAdapter { } /// - /// Outputs Lora Parameters, their names and values + /// Outputs Lora Parameters on CPU, their names and values /// into the supplied output iterators. /// /// /// /// output iterator that accepts const char* - /// output iterator that accepts OrtValue + /// output iterator that accepts const OrtValue* template - void OutputAdaptersParameters(NamesOutputIter names_out, + void OutputLoadedAdaptersParameters(NamesOutputIter names_out, TensorOutputIter tensor_out) const { for (const auto& [name, param] : params_values_) { *names_out = name.c_str(); ++names_out; - *tensor_out = param.ort_value_mapped_; + *tensor_out = ¶m.ort_value_mapped_; ++tensor_out; } } diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index 6393cbc697030..c4cf617960280 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -86,21 +86,21 @@ auto verify_load = [](const lora::LoraAdapter& adapter) { ASSERT_EQ(param_num, 2U); InlinedVector names; - InlinedVector ort_values; + InlinedVector ort_values; names.reserve(param_num); ort_values.reserve(param_num); - adapter.OutputAdaptersParameters(std::back_inserter(names), std::back_inserter(ort_values)); + adapter.OutputLoadedAdaptersParameters(std::back_inserter(names), std::back_inserter(ort_values)); ASSERT_EQ(param_num, names.size()); ASSERT_EQ(param_num, ort_values.size()); for (size_t i = 0; i < param_num; ++i) { const auto& name = names[i]; - const auto& ort_value = ort_values[i]; + const auto* ort_value = ort_values[i]; ASSERT_TRUE(name != nullptr); - ASSERT_TRUE(ort_value.IsTensor()); + ASSERT_TRUE(ort_value->IsTensor()); - const auto& tensor = ort_value.Get(); + const auto& tensor = ort_value->Get(); ASSERT_NE(tensor.GetElementType(), ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED); const auto shape = tensor.Shape().GetDims(); From fd02453f9f7a7ded7f973fe4b5124822a65f26f0 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 5 Sep 2024 16:37:07 -0700 Subject: [PATCH 16/84] Add format builder --- onnxruntime/lora/lora_format_utils.cc | 29 +++++++++++++++++++ onnxruntime/lora/lora_format_utils.h | 40 +++++++++++++++++++++++++++ onnxruntime/test/lora/lora_test.cc | 34 ++++++----------------- 3 files changed, 77 insertions(+), 26 deletions(-) diff --git a/onnxruntime/lora/lora_format_utils.cc b/onnxruntime/lora/lora_format_utils.cc index 378f2833904ca..9a4c1ce6f2415 100644 --- a/onnxruntime/lora/lora_format_utils.cc +++ b/onnxruntime/lora/lora_format_utils.cc @@ -124,6 +124,35 @@ OrtValue CreateOrtValueOnDevice(const OrtValue& ort_value_mapped, const Allocato return result; } +void AdapterFormatBuilder::AddParameter(const std::string& name, lora::TensorDataType data_type, + gsl::span shape, gsl::span data) { + flatbuffers::Offset fbs_param; + SaveLoraParameter(builder_, name, data_type, shape, data, fbs_param); + params_.push_back(fbs_param); +} + +std::vector AdapterFormatBuilder::Finish(int adapter_version, int model_version) { + FinishImpl(adapter_version, model_version); + + std::vector result; + result.reserve(builder_.GetSize()); + gsl::span buffer(builder_.GetBufferPointer(), builder_.GetSize()); + std::copy(buffer.begin(), buffer.end(), std::back_inserter(result)); + return result; +} + +gsl::span AdapterFormatBuilder::FinishWithSpan(int adapter_version, int model_version) { + FinishImpl(adapter_version, model_version); + return gsl::make_span(builder_.GetBufferPointer(), builder_.GetSize()); +} + +void AdapterFormatBuilder::FinishImpl(int adapter_version, int model_version) { + auto fbs_params = builder_.CreateVector(params_); + auto fbs_adapter = lora::CreateAdapter(builder_, lora::kLoraFormatVersion, adapter_version, + model_version, fbs_params); + builder_.Finish(fbs_adapter, lora::AdapterIdentifier()); +} + } // namespace utils } // namespace lora } // namespace onnxruntime diff --git a/onnxruntime/lora/lora_format_utils.h b/onnxruntime/lora/lora_format_utils.h index 3c0bede4c5f2b..e7e341945f2ca 100644 --- a/onnxruntime/lora/lora_format_utils.h +++ b/onnxruntime/lora/lora_format_utils.h @@ -23,6 +23,46 @@ namespace onnxruntime { namespace lora { namespace utils { +/// +/// Helper class to serialize Lora adapter +/// +class AdapterFormatBuilder { + public: + AdapterFormatBuilder() = default; + + /// + /// Appends parameter tensor to the adapter builder + /// + /// parameter name + /// + /// + /// + void AddParameter(const std::string& name, lora::TensorDataType data_type, + gsl::span shape, gsl::span data); + + /// + /// Finishes serialization and returns a serialized byte vector + /// + /// + /// + /// + std::vector Finish(int adapter_version, int model_version); + + /// + /// Finishes serialization and returns a span to internal buffer. + /// + /// + /// + /// + gsl::span FinishWithSpan(int adapter_version, int model_version); + + private: + void FinishImpl(int adapter_version, int model_version); + + flatbuffers::FlatBufferBuilder builder_; + std::vector> params_; +}; + /// /// /// diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index c4cf617960280..65a49865fed2d 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -150,31 +150,13 @@ struct GenerateTestParameters { std::iota(param_2.begin(), param_2.end(), T{32}); } - flatbuffers::FlatBufferBuilder builder; - std::vector> params; - params.reserve(2); - - flatbuffers::Offset fbs_param_1, fbs_param_2; - auto byte_span = ReinterpretAsSpan(gsl::make_span(param_1)); - lora::utils::SaveLoraParameter(builder, "param_1", static_cast(data_type), param_shape, - byte_span, fbs_param_1); - params.push_back(fbs_param_1); - - byte_span = ReinterpretAsSpan(gsl::make_span(param_2)); - lora::utils::SaveLoraParameter(builder, "param_2", static_cast(data_type), param_shape, - byte_span, fbs_param_2); - params.push_back(fbs_param_2); - - auto fbs_params = builder.CreateVector(params); - auto fbs_adapter = lora::CreateAdapter(builder, lora::kLoraFormatVersion, kAdapterVersion, kModelVersion, - fbs_params); - builder.Finish(fbs_adapter, lora::AdapterIdentifier()); - - std::vector result; - result.reserve(builder.GetSize()); - gsl::span buffer(builder.GetBufferPointer(), builder.GetSize()); - std::copy(buffer.begin(), buffer.end(), std::back_inserter(result)); - return result; + lora::utils::AdapterFormatBuilder adapter_builder; + adapter_builder.AddParameter("param_1", static_cast(data_type), + param_shape, ReinterpretAsSpan(gsl::make_span(param_1))); + adapter_builder.AddParameter("param_2", static_cast(data_type), + param_shape, ReinterpretAsSpan(gsl::make_span(param_2))); + + return adapter_builder.Finish(kAdapterVersion, kModelVersion); } }; @@ -194,7 +176,7 @@ TEST(LoraAdapterTest, Load) { // Test different data types const auto data_types = gsl::make_span(lora::EnumValuesTensorDataType()); for (size_t i = 1, size = data_types.size(); i < size; ++i) { - if (i == 8 || i == 9 || i == 14 || i == 15 || (i > 16 && i < 21)) + if (i == 8 || i == 9 || i == 14 || i == 15 || (i > 16 && i < 21)) continue; utils::MLTypeCallDispatcher Date: Mon, 9 Sep 2024 10:05:55 -0700 Subject: [PATCH 17/84] Start Python impl --- onnxruntime/python/onnxruntime_pybind_lora.cc | 104 ++++++++++++++++++ .../python/onnxruntime_pybind_mlvalue.cc | 6 +- onnxruntime/python/onnxruntime_pybind_state.h | 1 + 3 files changed, 107 insertions(+), 4 deletions(-) create mode 100644 onnxruntime/python/onnxruntime_pybind_lora.cc diff --git a/onnxruntime/python/onnxruntime_pybind_lora.cc b/onnxruntime/python/onnxruntime_pybind_lora.cc new file mode 100644 index 0000000000000..5339bdf257dca --- /dev/null +++ b/onnxruntime/python/onnxruntime_pybind_lora.cc @@ -0,0 +1,104 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +#include "python/onnxruntime_pybind_exceptions.h" +#include "python/onnxruntime_pybind_mlvalue.h" +#include "python/onnxruntime_pybind_state_common.h" + +#define NO_IMPORT_ARRAY +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#define PY_ARRAY_UNIQUE_SYMBOL onnxruntime_python_ARRAY_API +#include "python/numpy_helper.h" + +#include "core/graph/onnx_protobuf.h" + +#include "core/framework/ort_value.h" +#include "core/framework/tensor.h" + +#include "lora/lora_format_utils.h" + +namespace onnxruntime { +namespace python { + +namespace py = pybind11; + +namespace { + +// Check if the numpy dtype descr property has any of the known types +// that is not supported natively by numpy arrays +std::optional GetDescrPropertyString(const py::dtype& arr_dtype) { + std::string custom_type; + try { + if (py::hasattr(arr_dtype, "descr")) { + auto descr = py::getattr(arr_dtype, "descr").cast(); + if (descr.size() > 0) { + auto item = descr[0].cast(); + if (item.size() > 0) { + custom_type = item[0].cast(); + } + } + } + } catch (const py::cast_error&) { + // Ignore the exception + PyErr_Clear(); + return {}; + } + return custom_type; +} +} // namespace + +void AddLoraMethods(pybind11::module& m) { + m.def( + "export_lora_parameters", [](const std::string& file_name, int adapter_version, int model_version, const pybind11::dict& lora_parameters) { + std::ofstream file(file_name, std::ios::binary); + if (file.fail()) { + ORT_THROW("Failed to open file:", file_name, " for writing."); + } + + lora::utils::AdapterFormatBuilder format_builder; + for (const auto& [n, arr] : lora_parameters) { + const std::string param_name = py::str(n); + py::array np_array = arr.cast(); + + py::dtype arr_dtype = np_array.dtype(); + + // This is the element type as supported by numpy, + // however, we can have bfloat16 and float8 types custome types defined. + auto ml_element_type = NumpyTypeToOnnxRuntimeTensorType(arr_dtype.num()); + auto onnx_element_type = static_cast( + ml_element_type->AsPrimitiveDataType()->GetDataType()); + + if (!ONNX_NAMESPACE::TensorProto_DataType_IsValid(onnx_element_type)) { + ORT_THROW("Unsupported tensor ONNX element type: ", onnx_element_type); + } + + // Adjust for custom ONNX types + // see https://github.com/onnx/onnx/blob/main/onnx/_custom_element_types.py + switch (onnx_element_type) { + // Check if this really means BFloat16 as numpy custom types are conveyed + // by means of special annotations. + case ONNX_NAMESPACE::TensorProto_DataType_UINT16: { + auto custom_type = GetDescrPropertyString(arr_dtype); + if (custom_type.has_value()) { + // onnx_element_type = map string to type + } + break; + } + + // Check if this really means one of the float8 types + case ONNX_NAMESPACE::TensorProto_DataType_INT8: { + auto custom_type = GetDescrPropertyString(arr_dtype); + if (custom_type.has_value()) { + // onnx_element_type = map string to type + } + break; + } + default: + break; + }; + } + }, + "Save lora adapter parameters into a lora file format. "); +} + +} // namespace python +} // namespace onnxruntime \ No newline at end of file diff --git a/onnxruntime/python/onnxruntime_pybind_mlvalue.cc b/onnxruntime/python/onnxruntime_pybind_mlvalue.cc index 8fdac257297c1..010039e2e8417 100644 --- a/onnxruntime/python/onnxruntime_pybind_mlvalue.cc +++ b/onnxruntime/python/onnxruntime_pybind_mlvalue.cc @@ -87,15 +87,13 @@ static TensorShape GetArrayShape(PyArrayObject* pyObject) { const int ndim = PyArray_NDIM(pyObject); const npy_intp* npy_dims = PyArray_DIMS(pyObject); auto span = gsl::make_span(npy_dims, ndim); - std::vector dims(span.begin(), span.end()); - TensorShape shape(std::move(dims)); + TensorShape shape(span); return shape; } TensorShape GetShape(const py::array& arr) { auto span = gsl::make_span(arr.shape(), arr.ndim()); - std::vector dims(span.begin(), span.end()); - TensorShape shape(std::move(dims)); + TensorShape shape(span); return shape; } diff --git a/onnxruntime/python/onnxruntime_pybind_state.h b/onnxruntime/python/onnxruntime_pybind_state.h index 47cde0d4cf193..fc9ef83d7a0d3 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.h +++ b/onnxruntime/python/onnxruntime_pybind_state.h @@ -9,6 +9,7 @@ namespace python { void addGlobalMethods(py::module& m, Environment& env); void addObjectMethods(py::module& m, Environment& env); void addOrtValueMethods(pybind11::module& m); +void AddLoraMethods(pybind11::module& m); } // namespace python } // namespace onnxruntime From a6624525ed1071d879e715fcd753247ae5748fc4 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 28 Aug 2024 14:52:50 -0700 Subject: [PATCH 18/84] Add Lora Parameters schema and script --- onnxruntime/lora/lora_format/README.md | 36 ++ .../lora/lora_format/compile_schema.py | 53 +++ onnxruntime/lora/lora_format/lora_schema.fbs | 51 +++ .../lora/lora_format/lora_schema.fbs.h | 338 ++++++++++++++++++ 4 files changed, 478 insertions(+) create mode 100644 onnxruntime/lora/lora_format/README.md create mode 100644 onnxruntime/lora/lora_format/compile_schema.py create mode 100644 onnxruntime/lora/lora_format/lora_schema.fbs create mode 100644 onnxruntime/lora/lora_format/lora_schema.fbs.h diff --git a/onnxruntime/lora/lora_format/README.md b/onnxruntime/lora/lora_format/README.md new file mode 100644 index 0000000000000..ec39974464c1c --- /dev/null +++ b/onnxruntime/lora/lora_format/README.md @@ -0,0 +1,36 @@ +# Lora Parameters Flatbuffer Schemas +This directory contains [ONNXRuntime Lora Parameter format schema](lora_schema.fbs) and [the generated C++ header file](lora_schema.fbs.h) for the +Lora Parameters file format. This file format is defined as means to deliver Lora parameters so it can read by ONNXRuntime C++ code. + +The format format is generally designed to house a single Lora adapter named Lora parameters. + +[ONNXRuntime Lora Parameter file format schema](lora_schema.fbs) uses the [FlatBuffers](https://github.com/google/flatbuffers) serialization library. + +Please do not directly modify the generated C++ header file for [ONNXRuntime Lora Parameter file format]((lora_schema.fbs.h)). + +Use flatc compiler for the purpose. + +e.g. + - Windows Debug build + - \build\Windows\Debug\_deps\flatbuffers-build\Debug\flatc.exe + - Linux Debug build + - /build/Linux/Debug/_deps/flatbuffers-build/flatc + +It is possible to use another flatc as well, e.g., from a separate installation. + +To update the flatbuffers schemas and generated files: +1. Modify [ONNXRuntime Lora Parameter file format schema](lora_schema.fbs). +2. Run [compile_schema.py](./compile_schema.py) to generate the C++ and Python bindings. + + ``` + python onnxruntime/lora/lora_format/compile_schema.py --flatc + ``` +# Lora format version history +In [lora_format_version.h](../lora_format_version.h), see `IsLoraParameterslVersionSupported()` for the supported versions and +`kLoraParametersVersion` for the current version. + +## Version 1 +History begins. + +Initial support for FlatBuffers that Lora Parameters support. This includes a definition of Tensor entity +so it can be saved in a tensor per file format. diff --git a/onnxruntime/lora/lora_format/compile_schema.py b/onnxruntime/lora/lora_format/compile_schema.py new file mode 100644 index 0000000000000..bee53885a2005 --- /dev/null +++ b/onnxruntime/lora/lora_format/compile_schema.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import argparse +import pathlib +import subprocess + +SCRIPT_DIR = pathlib.Path(__file__).parent.resolve() + +def generate_cpp(flatc: pathlib.Path, schema_path: pathlib.Path): + # run flatc to generate C++ code + cmd = [str(flatc), "--cpp", "--scoped-enums", "--filename-suffix", ".fbs", str(schema_path)] + subprocess.run(cmd, check=True, cwd=SCRIPT_DIR) + + +def main(): + parser = argparse.ArgumentParser( + description="Generate language bindings for the ORT flatbuffers schema.", + usage="Provide the path to the flatbuffers flatc executable. " + "Script can be executed from anywhere but must be located in its original " + "directory in the ONNX Runtime enlistment.", + ) + + parser.add_argument( + "-f", + "--flatc", + required=True, + type=pathlib.Path, + help="Path to flatbuffers flatc executable. " + "Can be found in the build directory under _deps/flatbuffers-build//", + ) + + all_languages = ["cpp"] + parser.add_argument( + "-l", + "--language", + action="append", + dest="languages", + choices=all_languages, + help="Specify which language bindings to generate.", + ) + + args = parser.parse_args() + languages = args.languages if args.languages is not None else all_languages + flatc = args.flatc.resolve(strict=True) + schema_path = SCRIPT_DIR / "lora_schema.fbs" + + if "cpp" in languages: + generate_cpp(flatc, schema_path) + +if __name__ == "__main__": + main() diff --git a/onnxruntime/lora/lora_format/lora_schema.fbs b/onnxruntime/lora/lora_format/lora_schema.fbs new file mode 100644 index 0000000000000..9079211ae80a6 --- /dev/null +++ b/onnxruntime/lora/lora_format/lora_schema.fbs @@ -0,0 +1,51 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace onnxruntime.lora_format; + +// Tensor +enum TensorDataType : int32 { + UNDEFINED = 0, + FLOAT = 1, + UINT8 = 2, + INT8 = 3, + UINT16 = 4, + INT16 = 5, + INT32 = 6, + INT64 = 7, + STRING = 8, + BOOL = 9, + FLOAT16 = 10, + DOUBLE = 11, + UINT32 = 12, + UINT64 = 13, + COMPLEX64 = 14, + COMPLEX128 = 15, + BFLOAT16 = 16, + FLOAT8E4M3FN = 17, + FLOAT8E4M3FNUZ = 18, + FLOAT8E5M2 = 19, + FLOAT8E5M2FNUZ = 20, +} + +// For simplicity, we will have only have one data field +// - raw_data for all primitive types. +// We do not foresee strings as parameters. +table Parameter { + name:string; + + dims:[int64]; + data_type:TensorDataType; + + raw_data:[uint8]; +} + +table Adapter { + format_version:int; + adapter_version:int; + model_version:int; + parameters:[Parameter]; +} + +root_type Adapter; +file_identifier "GAIL"; diff --git a/onnxruntime/lora/lora_format/lora_schema.fbs.h b/onnxruntime/lora/lora_format/lora_schema.fbs.h new file mode 100644 index 0000000000000..a70bb36a0aa68 --- /dev/null +++ b/onnxruntime/lora/lora_format/lora_schema.fbs.h @@ -0,0 +1,338 @@ +// automatically generated by the FlatBuffers compiler, do not modify + + +#ifndef FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_FORMAT_H_ +#define FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_FORMAT_H_ + +#include "flatbuffers/flatbuffers.h" + +// Ensure the included flatbuffers.h is the same version as when this file was +// generated, otherwise it may not be compatible. +static_assert(FLATBUFFERS_VERSION_MAJOR == 23 && + FLATBUFFERS_VERSION_MINOR == 5 && + FLATBUFFERS_VERSION_REVISION == 26, + "Non-compatible flatbuffers version included"); + +namespace onnxruntime { +namespace lora_format { + +struct Parameter; +struct ParameterBuilder; + +struct Adapter; +struct AdapterBuilder; + +enum class TensorDataType : int32_t { + UNDEFINED = 0, + FLOAT = 1, + UINT8 = 2, + INT8 = 3, + UINT16 = 4, + INT16 = 5, + INT32 = 6, + INT64 = 7, + STRING = 8, + BOOL = 9, + FLOAT16 = 10, + DOUBLE = 11, + UINT32 = 12, + UINT64 = 13, + COMPLEX64 = 14, + COMPLEX128 = 15, + BFLOAT16 = 16, + FLOAT8E4M3FN = 17, + FLOAT8E4M3FNUZ = 18, + FLOAT8E5M2 = 19, + FLOAT8E5M2FNUZ = 20, + MIN = UNDEFINED, + MAX = FLOAT8E5M2FNUZ +}; + +inline const TensorDataType (&EnumValuesTensorDataType())[21] { + static const TensorDataType values[] = { + TensorDataType::UNDEFINED, + TensorDataType::FLOAT, + TensorDataType::UINT8, + TensorDataType::INT8, + TensorDataType::UINT16, + TensorDataType::INT16, + TensorDataType::INT32, + TensorDataType::INT64, + TensorDataType::STRING, + TensorDataType::BOOL, + TensorDataType::FLOAT16, + TensorDataType::DOUBLE, + TensorDataType::UINT32, + TensorDataType::UINT64, + TensorDataType::COMPLEX64, + TensorDataType::COMPLEX128, + TensorDataType::BFLOAT16, + TensorDataType::FLOAT8E4M3FN, + TensorDataType::FLOAT8E4M3FNUZ, + TensorDataType::FLOAT8E5M2, + TensorDataType::FLOAT8E5M2FNUZ + }; + return values; +} + +inline const char * const *EnumNamesTensorDataType() { + static const char * const names[22] = { + "UNDEFINED", + "FLOAT", + "UINT8", + "INT8", + "UINT16", + "INT16", + "INT32", + "INT64", + "STRING", + "BOOL", + "FLOAT16", + "DOUBLE", + "UINT32", + "UINT64", + "COMPLEX64", + "COMPLEX128", + "BFLOAT16", + "FLOAT8E4M3FN", + "FLOAT8E4M3FNUZ", + "FLOAT8E5M2", + "FLOAT8E5M2FNUZ", + nullptr + }; + return names; +} + +inline const char *EnumNameTensorDataType(TensorDataType e) { + if (::flatbuffers::IsOutRange(e, TensorDataType::UNDEFINED, TensorDataType::FLOAT8E5M2FNUZ)) return ""; + const size_t index = static_cast(e); + return EnumNamesTensorDataType()[index]; +} + +struct Parameter FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ParameterBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_NAME = 4, + VT_DIMS = 6, + VT_DATA_TYPE = 8, + VT_RAW_DATA = 10 + }; + const ::flatbuffers::String *name() const { + return GetPointer(VT_NAME); + } + const ::flatbuffers::Vector *dims() const { + return GetPointer *>(VT_DIMS); + } + onnxruntime::lora_format::TensorDataType data_type() const { + return static_cast(GetField(VT_DATA_TYPE, 0)); + } + const ::flatbuffers::Vector *raw_data() const { + return GetPointer *>(VT_RAW_DATA); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_NAME) && + verifier.VerifyString(name()) && + VerifyOffset(verifier, VT_DIMS) && + verifier.VerifyVector(dims()) && + VerifyField(verifier, VT_DATA_TYPE, 4) && + VerifyOffset(verifier, VT_RAW_DATA) && + verifier.VerifyVector(raw_data()) && + verifier.EndTable(); + } +}; + +struct ParameterBuilder { + typedef Parameter Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_name(::flatbuffers::Offset<::flatbuffers::String> name) { + fbb_.AddOffset(Parameter::VT_NAME, name); + } + void add_dims(::flatbuffers::Offset<::flatbuffers::Vector> dims) { + fbb_.AddOffset(Parameter::VT_DIMS, dims); + } + void add_data_type(onnxruntime::lora_format::TensorDataType data_type) { + fbb_.AddElement(Parameter::VT_DATA_TYPE, static_cast(data_type), 0); + } + void add_raw_data(::flatbuffers::Offset<::flatbuffers::Vector> raw_data) { + fbb_.AddOffset(Parameter::VT_RAW_DATA, raw_data); + } + explicit ParameterBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateParameter( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::String> name = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> dims = 0, + onnxruntime::lora_format::TensorDataType data_type = onnxruntime::lora_format::TensorDataType::UNDEFINED, + ::flatbuffers::Offset<::flatbuffers::Vector> raw_data = 0) { + ParameterBuilder builder_(_fbb); + builder_.add_raw_data(raw_data); + builder_.add_data_type(data_type); + builder_.add_dims(dims); + builder_.add_name(name); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateParameterDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const char *name = nullptr, + const std::vector *dims = nullptr, + onnxruntime::lora_format::TensorDataType data_type = onnxruntime::lora_format::TensorDataType::UNDEFINED, + const std::vector *raw_data = nullptr) { + auto name__ = name ? _fbb.CreateString(name) : 0; + auto dims__ = dims ? _fbb.CreateVector(*dims) : 0; + auto raw_data__ = raw_data ? _fbb.CreateVector(*raw_data) : 0; + return onnxruntime::lora_format::CreateParameter( + _fbb, + name__, + dims__, + data_type, + raw_data__); +} + +struct Adapter FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef AdapterBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FORMAT_VERSION = 4, + VT_ADAPTER_VERSION = 6, + VT_MODEL_VERSION = 8, + VT_PARAMETERS = 10 + }; + int32_t format_version() const { + return GetField(VT_FORMAT_VERSION, 0); + } + int32_t adapter_version() const { + return GetField(VT_ADAPTER_VERSION, 0); + } + int32_t model_version() const { + return GetField(VT_MODEL_VERSION, 0); + } + const ::flatbuffers::Vector<::flatbuffers::Offset> *parameters() const { + return GetPointer> *>(VT_PARAMETERS); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FORMAT_VERSION, 4) && + VerifyField(verifier, VT_ADAPTER_VERSION, 4) && + VerifyField(verifier, VT_MODEL_VERSION, 4) && + VerifyOffset(verifier, VT_PARAMETERS) && + verifier.VerifyVector(parameters()) && + verifier.VerifyVectorOfTables(parameters()) && + verifier.EndTable(); + } +}; + +struct AdapterBuilder { + typedef Adapter Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_format_version(int32_t format_version) { + fbb_.AddElement(Adapter::VT_FORMAT_VERSION, format_version, 0); + } + void add_adapter_version(int32_t adapter_version) { + fbb_.AddElement(Adapter::VT_ADAPTER_VERSION, adapter_version, 0); + } + void add_model_version(int32_t model_version) { + fbb_.AddElement(Adapter::VT_MODEL_VERSION, model_version, 0); + } + void add_parameters(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> parameters) { + fbb_.AddOffset(Adapter::VT_PARAMETERS, parameters); + } + explicit AdapterBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateAdapter( + ::flatbuffers::FlatBufferBuilder &_fbb, + int32_t format_version = 0, + int32_t adapter_version = 0, + int32_t model_version = 0, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> parameters = 0) { + AdapterBuilder builder_(_fbb); + builder_.add_parameters(parameters); + builder_.add_model_version(model_version); + builder_.add_adapter_version(adapter_version); + builder_.add_format_version(format_version); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateAdapterDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + int32_t format_version = 0, + int32_t adapter_version = 0, + int32_t model_version = 0, + const std::vector<::flatbuffers::Offset> *parameters = nullptr) { + auto parameters__ = parameters ? _fbb.CreateVector<::flatbuffers::Offset>(*parameters) : 0; + return onnxruntime::lora_format::CreateAdapter( + _fbb, + format_version, + adapter_version, + model_version, + parameters__); +} + +inline const onnxruntime::lora_format::Adapter *GetAdapter(const void *buf) { + return ::flatbuffers::GetRoot(buf); +} + +inline const onnxruntime::lora_format::Adapter *GetSizePrefixedAdapter(const void *buf) { + return ::flatbuffers::GetSizePrefixedRoot(buf); +} + +inline const char *AdapterIdentifier() { + return "GAIL"; +} + +inline bool AdapterBufferHasIdentifier(const void *buf) { + return ::flatbuffers::BufferHasIdentifier( + buf, AdapterIdentifier()); +} + +inline bool SizePrefixedAdapterBufferHasIdentifier(const void *buf) { + return ::flatbuffers::BufferHasIdentifier( + buf, AdapterIdentifier(), true); +} + +inline bool VerifyAdapterBuffer( + ::flatbuffers::Verifier &verifier) { + return verifier.VerifyBuffer(AdapterIdentifier()); +} + +inline bool VerifySizePrefixedAdapterBuffer( + ::flatbuffers::Verifier &verifier) { + return verifier.VerifySizePrefixedBuffer(AdapterIdentifier()); +} + +inline void FinishAdapterBuffer( + ::flatbuffers::FlatBufferBuilder &fbb, + ::flatbuffers::Offset root) { + fbb.Finish(root, AdapterIdentifier()); +} + +inline void FinishSizePrefixedAdapterBuffer( + ::flatbuffers::FlatBufferBuilder &fbb, + ::flatbuffers::Offset root) { + fbb.FinishSizePrefixed(root, AdapterIdentifier()); +} + +} // namespace lora_format +} // namespace onnxruntime + +#endif // FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_FORMAT_H_ From 889d7caeeca0795ceafe74f3f5d0aa2e87a4495d Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 29 Aug 2024 17:22:36 -0700 Subject: [PATCH 19/84] Add onnxruntime_lora static lib --- cmake/CMakeLists.txt | 2 +- cmake/onnxruntime_lora.cmake | 30 ++++ cmake/onnxruntime_python.cmake | 1 + cmake/onnxruntime_unittests.cmake | 12 +- cmake/onnxruntime_webassembly.cmake | 2 + cmake/winml_unittests.cmake | 2 +- onnxruntime/lora/lora_adapters.cc | 35 +++++ onnxruntime/lora/lora_adapters.h | 74 +++++++++ onnxruntime/lora/lora_format/lora_schema.fbs | 2 +- .../lora/lora_format/lora_schema.fbs.h | 144 +++++++++--------- onnxruntime/lora/lora_format_utils.cc | 111 ++++++++++++++ onnxruntime/lora/lora_format_utils.h | 87 +++++++++++ onnxruntime/lora/lora_format_version.h | 33 ++++ 13 files changed, 455 insertions(+), 80 deletions(-) create mode 100644 cmake/onnxruntime_lora.cmake create mode 100644 onnxruntime/lora/lora_adapters.cc create mode 100644 onnxruntime/lora/lora_adapters.h create mode 100644 onnxruntime/lora/lora_format_utils.cc create mode 100644 onnxruntime/lora/lora_format_utils.h create mode 100644 onnxruntime/lora/lora_format_version.h diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 2c8fb4824d94a..6116c8efea490 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -1693,7 +1693,7 @@ endif() #Now the 'onnxruntime_EXTERNAL_LIBRARIES' variable should be sealed. It will be used in onnxruntime.cmake which will be included in the next. #The order of the following targets matters. Right depends on left. If target A appears before target B. Then A.cmake can not use variables defined in B.cmake. -set(ONNXRUNTIME_CMAKE_FILES onnxruntime_flatbuffers onnxruntime_common onnxruntime_mlas onnxruntime_graph onnxruntime_framework onnxruntime_util onnxruntime_providers onnxruntime_optimizer onnxruntime_session ${ONNXRUNTIME_EAGER_CMAKE_FILE_NAME}) +set(ONNXRUNTIME_CMAKE_FILES onnxruntime_flatbuffers onnxruntime_common onnxruntime_mlas onnxruntime_graph onnxruntime_lora onnxruntime_framework onnxruntime_util onnxruntime_providers onnxruntime_optimizer onnxruntime_session ${ONNXRUNTIME_EAGER_CMAKE_FILE_NAME}) if (onnxruntime_USE_WINML) # WINML uses and depends on the shared lib. Note: You can build WINML without DML and you will get a diff --git a/cmake/onnxruntime_lora.cmake b/cmake/onnxruntime_lora.cmake new file mode 100644 index 0000000000000..3f99e230031ad --- /dev/null +++ b/cmake/onnxruntime_lora.cmake @@ -0,0 +1,30 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +file(GLOB onnxruntime_lora_srcs CONFIGURE_DEPENDS + "${ONNXRUNTIME_ROOT}/lora_format/*.h" + "${ONNXRUNTIME_ROOT}/lora/*.h" + "${ONNXRUNTIME_ROOT}/lora/*.cc" + ) + +source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_lora_srcs}) + +onnxruntime_add_static_library(onnxruntime_lora ${onnxruntime_lora_srcs}) +onnxruntime_add_include_to_target(onnxruntime_lora onnx flatbuffers::flatbuffers ${GSL_TARGET}) +target_link_libraries(onnxruntime_lora onnxruntime_framework) + +if(onnxruntime_ENABLE_INSTRUMENT) + target_compile_definitions(onnxruntime_lora PUBLIC ONNXRUNTIME_ENABLE_INSTRUMENT) +endif() + +target_include_directories(onnxruntime_lora PRIVATE ${ONNXRUNTIME_ROOT}) +add_dependencies(onnxruntime_lora ${onnxruntime_EXTERNAL_DEPENDENCIES}) +set_target_properties(onnxruntime_lora PROPERTIES FOLDER "ONNXRuntime") + +if (NOT onnxruntime_BUILD_SHARED_LIB) + install(TARGETS onnxruntime_lora + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR}) +endif() diff --git a/cmake/onnxruntime_python.cmake b/cmake/onnxruntime_python.cmake index 574cffbb716b3..90afcd4d83e18 100644 --- a/cmake/onnxruntime_python.cmake +++ b/cmake/onnxruntime_python.cmake @@ -186,6 +186,7 @@ target_link_libraries(onnxruntime_pybind11_state PRIVATE onnxruntime_providers onnxruntime_util ${onnxruntime_tvm_libs} + onnxruntime_lora onnxruntime_framework onnxruntime_util onnxruntime_graph diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index 58dd08f15f4e2..533a15cc641bd 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -612,6 +612,7 @@ set(ONNXRUNTIME_TEST_LIBS onnxruntime_providers onnxruntime_util ${onnxruntime_tvm_libs} + onnxruntime_lora onnxruntime_framework onnxruntime_util onnxruntime_graph @@ -1232,7 +1233,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) #onnxruntime_common is kind of ok because it is thin, tiny and totally stateless. set(onnxruntime_perf_test_libs onnx_test_runner_common onnxruntime_test_utils onnxruntime_common - onnxruntime onnxruntime_flatbuffers onnx_test_data_proto + onnxruntime onnxruntime_lora onnxruntime_flatbuffers onnx_test_data_proto ${onnxruntime_EXTERNAL_LIBRARIES} ${GETOPT_LIB_WIDE} ${SYS_PATH_LIB} ${CMAKE_DL_LIBS}) if(NOT WIN32) @@ -1245,7 +1246,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) list(APPEND onnxruntime_perf_test_libs ${android_shared_libs}) endif() if (${CMAKE_SYSTEM_NAME} MATCHES "AIX") - list(APPEND onnxruntime_perf_test_libs onnxruntime_graph onnxruntime_session onnxruntime_providers onnxruntime_framework onnxruntime_util onnxruntime_mlas onnxruntime_optimizer onnxruntime_flatbuffers iconv re2 gtest absl_failure_signal_handler absl_examine_stack absl_flags_parse absl_flags_usage absl_flags_usage_internal) + list(APPEND onnxruntime_perf_test_libs onnxruntime_graph onnxruntime_session onnxruntime_providers onnxruntime_lora onnxruntime_framework onnxruntime_util onnxruntime_mlas onnxruntime_optimizer onnxruntime_flatbuffers iconv re2 gtest absl_failure_signal_handler absl_examine_stack absl_flags_parse absl_flags_usage absl_flags_usage_internal) endif() target_link_libraries(onnxruntime_perf_test PRIVATE ${onnxruntime_perf_test_libs} Threads::Threads) if(WIN32) @@ -1329,7 +1330,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) endif() if (${CMAKE_SYSTEM_NAME} MATCHES "AIX") - list(APPEND onnxruntime_shared_lib_test_LIBS onnxruntime_graph onnxruntime_session onnxruntime_providers onnxruntime_framework onnxruntime_util onnxruntime_mlas onnxruntime_optimizer onnxruntime_flatbuffers iconv re2) + list(APPEND onnxruntime_shared_lib_test_LIBS onnxruntime_graph onnxruntime_session onnxruntime_providers onnxruntime_framework onnxruntime_util onnxruntime_mlas onnxruntime_optimizer onnxruntime_lora onnxruntime_flatbuffers iconv re2) endif() AddTest(DYN @@ -1512,6 +1513,7 @@ endif() onnxruntime_optimizer onnxruntime_providers onnxruntime_util + onnxruntime_lora onnxruntime_framework onnxruntime_util onnxruntime_graph @@ -1632,7 +1634,7 @@ if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten") list(APPEND onnxruntime_customopregistration_test_LIBS ${TENSORRT_LIBRARY_INFER}) endif() if (${CMAKE_SYSTEM_NAME} MATCHES "AIX") - list(APPEND onnxruntime_customopregistration_test_LIBS onnxruntime_graph onnxruntime_session onnxruntime_providers onnxruntime_framework onnxruntime_util onnxruntime_mlas onnxruntime_optimizer onnxruntime_flatbuffers iconv re2 libprotobuf-lite onnx_proto nsync_cpp) + list(APPEND onnxruntime_customopregistration_test_LIBS onnxruntime_graph onnxruntime_session onnxruntime_providers onnxruntime_lora onnxruntime_framework onnxruntime_util onnxruntime_mlas onnxruntime_optimizer onnxruntime_flatbuffers iconv re2 libprotobuf-lite onnx_proto nsync_cpp) endif() AddTest(DYN TARGET onnxruntime_customopregistration_test @@ -1751,7 +1753,7 @@ if (onnxruntime_BUILD_SHARED_LIB AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" set(onnxruntime_logging_apis_test_LIBS onnxruntime_common onnxruntime_test_utils) if (${CMAKE_SYSTEM_NAME} MATCHES "AIX") - list(APPEND onnxruntime_logging_apis_test_LIBS onnxruntime_session onnxruntime_util onnxruntime_framework onnxruntime_common onnxruntime_graph onnxruntime_providers onnxruntime_mlas onnxruntime_optimizer onnxruntime_flatbuffers iconv re2 libprotobuf-lite onnx_proto nsync_cpp) + list(APPEND onnxruntime_logging_apis_test_LIBS onnxruntime_session onnxruntime_util onnxruntime_lora onnxruntime_framework onnxruntime_common onnxruntime_graph onnxruntime_providers onnxruntime_mlas onnxruntime_optimizer onnxruntime_flatbuffers iconv re2 libprotobuf-lite onnx_proto nsync_cpp) endif() if(NOT WIN32) diff --git a/cmake/onnxruntime_webassembly.cmake b/cmake/onnxruntime_webassembly.cmake index 0686b66876d9f..3a1576065205f 100644 --- a/cmake/onnxruntime_webassembly.cmake +++ b/cmake/onnxruntime_webassembly.cmake @@ -102,6 +102,7 @@ if (onnxruntime_BUILD_WEBASSEMBLY_STATIC_LIB) onnx onnx_proto onnxruntime_common + onnxruntime_lora onnxruntime_flatbuffers onnxruntime_framework onnxruntime_graph @@ -179,6 +180,7 @@ else() onnx onnx_proto onnxruntime_common + onnxruntime_lora onnxruntime_flatbuffers onnxruntime_framework onnxruntime_graph diff --git a/cmake/winml_unittests.cmake b/cmake/winml_unittests.cmake index b655e60a8aec9..68acac584f2c0 100644 --- a/cmake/winml_unittests.cmake +++ b/cmake/winml_unittests.cmake @@ -166,7 +166,7 @@ function (get_winml_test_model_src "${winml_test_src_path}/model/*.cpp") set(${output_winml_test_model_src} ${winml_test_model_src} PARENT_SCOPE) set(${winml_test_model_libs} onnx_test_data_proto onnx_test_runner_common onnxruntime_common onnxruntime_mlas - onnxruntime_graph onnxruntime_test_utils onnxruntime_framework onnxruntime_util onnxruntime_flatbuffers PARENT_SCOPE) + onnxruntime_graph onnxruntime_test_utils onnxruntime_lora onnxruntime_framework onnxruntime_util onnxruntime_flatbuffers PARENT_SCOPE) endfunction() file(GLOB winml_test_common_src CONFIGURE_DEPENDS diff --git a/onnxruntime/lora/lora_adapters.cc b/onnxruntime/lora/lora_adapters.cc new file mode 100644 index 0000000000000..1425690b41660 --- /dev/null +++ b/onnxruntime/lora/lora_adapters.cc @@ -0,0 +1,35 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "lora_adapters.h" +#include "lora_format_utils.h" + +#include +#include + +namespace onnxruntime { +namespace lora { +namespace details { + +LoraParam::LoraParam(std::string name, OrtValue ort_value) + : name_(std::move(name)), ort_value_(std::move(ort_value)) {} + +void BinaryFormatHolder::Load(const std::filesystem::path& file_path) { + auto buffer = utils::LoadLoraAdapterBytes(file_path); + adapter_ = utils::ValidateAndGetAdapterFromBytes(buffer); + buffer_.emplace(std::move(buffer)); +} + +size_t BinaryFormatHolder::GetSize() const { + if (std::holds_alternative(buffer_)) { + return std::get<0>(buffer_).file_size_; + } else if (std::holds_alternative(buffer_)) { + return std::get<1>(buffer_).buffer_.size(); + } + ORT_THROW("Non-exhaustive visitor for BinaryFormatHolder::GetSize()"); +} + +} // namespace details + +} // namespace lora +} // namespace onnxruntime \ No newline at end of file diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h new file mode 100644 index 0000000000000..5966e14248fb0 --- /dev/null +++ b/onnxruntime/lora/lora_adapters.h @@ -0,0 +1,74 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/platform/env.h" +#include "core/framework/ort_value.h" + +#include +#include +#include +#include + +namespace onnxruntime { +namespace lora { + +struct Adapter; + +namespace details { +// This class takes hold of the serialized parameters that +// are either loaded from disk or mapped from disk (coming in the future) +// This data is always in host memory. +class BinaryFormatHolder { + public: + BinaryFormatHolder() = default; + BinaryFormatHolder(const BinaryFormatHolder&) = delete; + BinaryFormatHolder& operator=(const BinaryFormatHolder&) = delete; + + /// + /// Load parameters from a flatbuffer file. + /// + /// file name that can be opened + void Load(const std::filesystem::path& file_path); + + void MemoryMapFile(const std::string& file_name); + + // Get Flatbuffer object pointer + const Adapter* GetParameters() const noexcept { return adapter_; } + + // Get the size of the buffer + size_t GetSize() const; + + private: + struct BufferHolder { + explicit BufferHolder(std::vector buffer) : buffer_(std::move(buffer)) {} + std::vector buffer_; + }; + + struct MemMapHolder { + MemMapHolder(Env::MappedMemoryPtr mapped_memory, size_t file_size) + : mapped_memory_(std::move(mapped_memory)), file_size_(file_size) {} + Env::MappedMemoryPtr mapped_memory_; + size_t file_size_; + }; + + std::variant buffer_; + const Adapter* adapter_; +}; + +/// +/// Represents a named lora parameter (tensor) +/// +struct LoraParam { + LoraParam() = default; + LoraParam(std::string name, OrtValue parameter); + + std::string name_; + OrtValue ort_value_; +}; + +} // namespace details + +} // namespace lora +} // namespace onnxruntime diff --git a/onnxruntime/lora/lora_format/lora_schema.fbs b/onnxruntime/lora/lora_format/lora_schema.fbs index 9079211ae80a6..073fe0945517d 100644 --- a/onnxruntime/lora/lora_format/lora_schema.fbs +++ b/onnxruntime/lora/lora_format/lora_schema.fbs @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -namespace onnxruntime.lora_format; +namespace onnxruntime.lora; // Tensor enum TensorDataType : int32 { diff --git a/onnxruntime/lora/lora_format/lora_schema.fbs.h b/onnxruntime/lora/lora_format/lora_schema.fbs.h index a70bb36a0aa68..72b27dd355814 100644 --- a/onnxruntime/lora/lora_format/lora_schema.fbs.h +++ b/onnxruntime/lora/lora_format/lora_schema.fbs.h @@ -1,8 +1,8 @@ // automatically generated by the FlatBuffers compiler, do not modify -#ifndef FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_FORMAT_H_ -#define FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_FORMAT_H_ +#ifndef FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_H_ +#define FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_H_ #include "flatbuffers/flatbuffers.h" @@ -14,7 +14,7 @@ static_assert(FLATBUFFERS_VERSION_MAJOR == 23 && "Non-compatible flatbuffers version included"); namespace onnxruntime { -namespace lora_format { +namespace lora { struct Parameter; struct ParameterBuilder; @@ -22,55 +22,55 @@ struct ParameterBuilder; struct Adapter; struct AdapterBuilder; -enum class TensorDataType : int32_t { - UNDEFINED = 0, - FLOAT = 1, - UINT8 = 2, - INT8 = 3, - UINT16 = 4, - INT16 = 5, - INT32 = 6, - INT64 = 7, - STRING = 8, - BOOL = 9, - FLOAT16 = 10, - DOUBLE = 11, - UINT32 = 12, - UINT64 = 13, - COMPLEX64 = 14, - COMPLEX128 = 15, - BFLOAT16 = 16, - FLOAT8E4M3FN = 17, - FLOAT8E4M3FNUZ = 18, - FLOAT8E5M2 = 19, - FLOAT8E5M2FNUZ = 20, - MIN = UNDEFINED, - MAX = FLOAT8E5M2FNUZ +enum TensorDataType : int32_t { + TensorDataType_UNDEFINED = 0, + TensorDataType_FLOAT = 1, + TensorDataType_UINT8 = 2, + TensorDataType_INT8 = 3, + TensorDataType_UINT16 = 4, + TensorDataType_INT16 = 5, + TensorDataType_INT32 = 6, + TensorDataType_INT64 = 7, + TensorDataType_STRING = 8, + TensorDataType_BOOL = 9, + TensorDataType_FLOAT16 = 10, + TensorDataType_DOUBLE = 11, + TensorDataType_UINT32 = 12, + TensorDataType_UINT64 = 13, + TensorDataType_COMPLEX64 = 14, + TensorDataType_COMPLEX128 = 15, + TensorDataType_BFLOAT16 = 16, + TensorDataType_FLOAT8E4M3FN = 17, + TensorDataType_FLOAT8E4M3FNUZ = 18, + TensorDataType_FLOAT8E5M2 = 19, + TensorDataType_FLOAT8E5M2FNUZ = 20, + TensorDataType_MIN = TensorDataType_UNDEFINED, + TensorDataType_MAX = TensorDataType_FLOAT8E5M2FNUZ }; inline const TensorDataType (&EnumValuesTensorDataType())[21] { static const TensorDataType values[] = { - TensorDataType::UNDEFINED, - TensorDataType::FLOAT, - TensorDataType::UINT8, - TensorDataType::INT8, - TensorDataType::UINT16, - TensorDataType::INT16, - TensorDataType::INT32, - TensorDataType::INT64, - TensorDataType::STRING, - TensorDataType::BOOL, - TensorDataType::FLOAT16, - TensorDataType::DOUBLE, - TensorDataType::UINT32, - TensorDataType::UINT64, - TensorDataType::COMPLEX64, - TensorDataType::COMPLEX128, - TensorDataType::BFLOAT16, - TensorDataType::FLOAT8E4M3FN, - TensorDataType::FLOAT8E4M3FNUZ, - TensorDataType::FLOAT8E5M2, - TensorDataType::FLOAT8E5M2FNUZ + TensorDataType_UNDEFINED, + TensorDataType_FLOAT, + TensorDataType_UINT8, + TensorDataType_INT8, + TensorDataType_UINT16, + TensorDataType_INT16, + TensorDataType_INT32, + TensorDataType_INT64, + TensorDataType_STRING, + TensorDataType_BOOL, + TensorDataType_FLOAT16, + TensorDataType_DOUBLE, + TensorDataType_UINT32, + TensorDataType_UINT64, + TensorDataType_COMPLEX64, + TensorDataType_COMPLEX128, + TensorDataType_BFLOAT16, + TensorDataType_FLOAT8E4M3FN, + TensorDataType_FLOAT8E4M3FNUZ, + TensorDataType_FLOAT8E5M2, + TensorDataType_FLOAT8E5M2FNUZ }; return values; } @@ -104,7 +104,7 @@ inline const char * const *EnumNamesTensorDataType() { } inline const char *EnumNameTensorDataType(TensorDataType e) { - if (::flatbuffers::IsOutRange(e, TensorDataType::UNDEFINED, TensorDataType::FLOAT8E5M2FNUZ)) return ""; + if (::flatbuffers::IsOutRange(e, TensorDataType_UNDEFINED, TensorDataType_FLOAT8E5M2FNUZ)) return ""; const size_t index = static_cast(e); return EnumNamesTensorDataType()[index]; } @@ -123,8 +123,8 @@ struct Parameter FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { const ::flatbuffers::Vector *dims() const { return GetPointer *>(VT_DIMS); } - onnxruntime::lora_format::TensorDataType data_type() const { - return static_cast(GetField(VT_DATA_TYPE, 0)); + onnxruntime::lora::TensorDataType data_type() const { + return static_cast(GetField(VT_DATA_TYPE, 0)); } const ::flatbuffers::Vector *raw_data() const { return GetPointer *>(VT_RAW_DATA); @@ -152,7 +152,7 @@ struct ParameterBuilder { void add_dims(::flatbuffers::Offset<::flatbuffers::Vector> dims) { fbb_.AddOffset(Parameter::VT_DIMS, dims); } - void add_data_type(onnxruntime::lora_format::TensorDataType data_type) { + void add_data_type(onnxruntime::lora::TensorDataType data_type) { fbb_.AddElement(Parameter::VT_DATA_TYPE, static_cast(data_type), 0); } void add_raw_data(::flatbuffers::Offset<::flatbuffers::Vector> raw_data) { @@ -173,7 +173,7 @@ inline ::flatbuffers::Offset CreateParameter( ::flatbuffers::FlatBufferBuilder &_fbb, ::flatbuffers::Offset<::flatbuffers::String> name = 0, ::flatbuffers::Offset<::flatbuffers::Vector> dims = 0, - onnxruntime::lora_format::TensorDataType data_type = onnxruntime::lora_format::TensorDataType::UNDEFINED, + onnxruntime::lora::TensorDataType data_type = onnxruntime::lora::TensorDataType_UNDEFINED, ::flatbuffers::Offset<::flatbuffers::Vector> raw_data = 0) { ParameterBuilder builder_(_fbb); builder_.add_raw_data(raw_data); @@ -187,12 +187,12 @@ inline ::flatbuffers::Offset CreateParameterDirect( ::flatbuffers::FlatBufferBuilder &_fbb, const char *name = nullptr, const std::vector *dims = nullptr, - onnxruntime::lora_format::TensorDataType data_type = onnxruntime::lora_format::TensorDataType::UNDEFINED, + onnxruntime::lora::TensorDataType data_type = onnxruntime::lora::TensorDataType_UNDEFINED, const std::vector *raw_data = nullptr) { auto name__ = name ? _fbb.CreateString(name) : 0; auto dims__ = dims ? _fbb.CreateVector(*dims) : 0; auto raw_data__ = raw_data ? _fbb.CreateVector(*raw_data) : 0; - return onnxruntime::lora_format::CreateParameter( + return onnxruntime::lora::CreateParameter( _fbb, name__, dims__, @@ -217,8 +217,8 @@ struct Adapter FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { int32_t model_version() const { return GetField(VT_MODEL_VERSION, 0); } - const ::flatbuffers::Vector<::flatbuffers::Offset> *parameters() const { - return GetPointer> *>(VT_PARAMETERS); + const ::flatbuffers::Vector<::flatbuffers::Offset> *parameters() const { + return GetPointer> *>(VT_PARAMETERS); } bool Verify(::flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && @@ -245,7 +245,7 @@ struct AdapterBuilder { void add_model_version(int32_t model_version) { fbb_.AddElement(Adapter::VT_MODEL_VERSION, model_version, 0); } - void add_parameters(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> parameters) { + void add_parameters(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> parameters) { fbb_.AddOffset(Adapter::VT_PARAMETERS, parameters); } explicit AdapterBuilder(::flatbuffers::FlatBufferBuilder &_fbb) @@ -264,7 +264,7 @@ inline ::flatbuffers::Offset CreateAdapter( int32_t format_version = 0, int32_t adapter_version = 0, int32_t model_version = 0, - ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> parameters = 0) { + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> parameters = 0) { AdapterBuilder builder_(_fbb); builder_.add_parameters(parameters); builder_.add_model_version(model_version); @@ -278,9 +278,9 @@ inline ::flatbuffers::Offset CreateAdapterDirect( int32_t format_version = 0, int32_t adapter_version = 0, int32_t model_version = 0, - const std::vector<::flatbuffers::Offset> *parameters = nullptr) { - auto parameters__ = parameters ? _fbb.CreateVector<::flatbuffers::Offset>(*parameters) : 0; - return onnxruntime::lora_format::CreateAdapter( + const std::vector<::flatbuffers::Offset> *parameters = nullptr) { + auto parameters__ = parameters ? _fbb.CreateVector<::flatbuffers::Offset>(*parameters) : 0; + return onnxruntime::lora::CreateAdapter( _fbb, format_version, adapter_version, @@ -288,12 +288,12 @@ inline ::flatbuffers::Offset CreateAdapterDirect( parameters__); } -inline const onnxruntime::lora_format::Adapter *GetAdapter(const void *buf) { - return ::flatbuffers::GetRoot(buf); +inline const onnxruntime::lora::Adapter *GetAdapter(const void *buf) { + return ::flatbuffers::GetRoot(buf); } -inline const onnxruntime::lora_format::Adapter *GetSizePrefixedAdapter(const void *buf) { - return ::flatbuffers::GetSizePrefixedRoot(buf); +inline const onnxruntime::lora::Adapter *GetSizePrefixedAdapter(const void *buf) { + return ::flatbuffers::GetSizePrefixedRoot(buf); } inline const char *AdapterIdentifier() { @@ -312,27 +312,27 @@ inline bool SizePrefixedAdapterBufferHasIdentifier(const void *buf) { inline bool VerifyAdapterBuffer( ::flatbuffers::Verifier &verifier) { - return verifier.VerifyBuffer(AdapterIdentifier()); + return verifier.VerifyBuffer(AdapterIdentifier()); } inline bool VerifySizePrefixedAdapterBuffer( ::flatbuffers::Verifier &verifier) { - return verifier.VerifySizePrefixedBuffer(AdapterIdentifier()); + return verifier.VerifySizePrefixedBuffer(AdapterIdentifier()); } inline void FinishAdapterBuffer( ::flatbuffers::FlatBufferBuilder &fbb, - ::flatbuffers::Offset root) { + ::flatbuffers::Offset root) { fbb.Finish(root, AdapterIdentifier()); } inline void FinishSizePrefixedAdapterBuffer( ::flatbuffers::FlatBufferBuilder &fbb, - ::flatbuffers::Offset root) { + ::flatbuffers::Offset root) { fbb.FinishSizePrefixed(root, AdapterIdentifier()); } -} // namespace lora_format +} // namespace lora } // namespace onnxruntime -#endif // FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_FORMAT_H_ +#endif // FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_H_ diff --git a/onnxruntime/lora/lora_format_utils.cc b/onnxruntime/lora/lora_format_utils.cc new file mode 100644 index 0000000000000..1e2ce1d58d0fe --- /dev/null +++ b/onnxruntime/lora/lora_format_utils.cc @@ -0,0 +1,111 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "lora_format_utils.h" +#include "lora_format_version.h" + +#include "core/common/common.h" +#include "core/common/span_utils.h" + +#include + +namespace onnxruntime { +namespace lora { +namespace utils { + +bool IsLoraFormatModelBytes(const void* bytes, size_t num_bytes) { + return num_bytes > 8 && // check buffer is large enough to contain identifier so we don't read random memory + AdapterBufferHasIdentifier(bytes); +} + +flatbuffers::Offset SaveStringToLoraFormat(flatbuffers::FlatBufferBuilder& builder, + bool has_string, const std::string& src) { + if (has_string) return builder.CreateString(src); + + // If the string does not exist, return 0 (the string does not exist in flatbuffer) + return 0; +} + +void LoadStringFromLoraFormat(std::string& dst, const flatbuffers::String* fbs_string) { + if (fbs_string) { + dst = fbs_string->str(); + } +} + +std::vector LoadLoraAdapterBytes(const std::filesystem::path& file_path) { + Env& env = Env::Default(); + + size_t file_size = 0; + ORT_THROW_IF_ERROR(env.GetFileLength(file_path.c_str(), file_size)); + + std::vector result; + result.resize(file_size); + + // The API accepts char span, so we need to reinterpret the uint8_t span as char span + auto dest_span = ReinterpretAsSpan(AsSpan(result)); + ORT_THROW_IF_ERROR(env.ReadFileIntoBuffer(file_path.c_str(), 0, file_size, dest_span)); + + return result; +} + +std::pair MemoryMapAdapterFile(const std::filesystem::path& file_path) { + Env& env = Env::Default(); + + size_t file_size = 0; + ORT_THROW_IF_ERROR(env.GetFileLength(file_path.c_str(), file_size)); + + Env::MappedMemoryPtr result; + ORT_THROW_IF_ERROR(env.MapFileIntoMemory(file_path.c_str(), 0, file_size, result)); + + return {std::move(result), file_size}; +} + +const Adapter* ValidateAndGetAdapterFromBytes(gsl::span bytes) { + if (!IsLoraFormatModelBytes(bytes.data(), bytes.size())) { + ORT_THROW("The buffer does not appear to be a valid lora parameter format"); + } + + flatbuffers::Verifier verifier(bytes.data(), bytes.size()); + if (!VerifyAdapterBuffer(verifier)) { + ORT_THROW("The buffer fails lora adapter format verification"); + } + + auto* adapter = GetAdapter(bytes.data()); + if (!IsLoraFormatVersionSupported(adapter->format_version())) { + ORT_THROW("Unsupported lora format version"); + } + + return adapter; +} + +void SaveLoraParameter(flatbuffers::FlatBufferBuilder& flat_builder, std::string_view name, + TensorDataType data_type, gsl::span shape, + gsl::span data, + flatbuffers::Offset& fbs_tensor) { + auto name_str = (name.empty()) ? 0 : flat_builder.CreateString(name.data(), name.size()); + auto shape_vec = flat_builder.CreateVector(shape.data(), shape.size()); + auto data_vec = flat_builder.CreateVector(data.data(), data.size()); + + fbs_tensor = CreateParameter(flat_builder, name_str, shape_vec, data_type, data_vec); +} + +// std::pair CreateOrtValueOverFlatBufferLoraParameter( +// const Parameter& tensor) { +// std::string name; +// LoadStringFromLoraFormat(name, tensor.name()); + +// const auto data_type = tensor.data_type(); + +// gsl::span shape_span(tensor.dims()->data(), tensor.dims()->size()); + +// auto mem_info = OrtMemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault); +// auto ort_value = +// OrtValue::CreateTensor(*mem_info, const_cast(tensor.raw_data()->data()), +// static_cast(tensor.raw_data()->size()), shape_span, +// static_cast(data_type)); +// return std::make_pair(std::move(name), std::move(ort_value)); +// } + +} // namespace utils +} // namespace lora +} // namespace onnxruntime diff --git a/onnxruntime/lora/lora_format_utils.h b/onnxruntime/lora/lora_format_utils.h new file mode 100644 index 0000000000000..508eb38ffb27c --- /dev/null +++ b/onnxruntime/lora/lora_format_utils.h @@ -0,0 +1,87 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/common/flatbuffers.h" +#include "core/platform/env.h" + +#include +#include + +#include "lora_format/lora_schema.fbs.h" + +#include +#include +#include +#include + +namespace onnxruntime { +namespace lora { +namespace utils { + +/// +/// +/// +/// +/// +/// +bool IsLoraFormatModelBytes(const void* bytes, size_t num_bytes); + +// Will only create string in flatbuffers when has_string is true +flatbuffers::Offset SaveStringToLoraFormat(flatbuffers::FlatBufferBuilder& builder, + bool has_string, const std::string& src); + +void LoadStringFromLoraFormat(std::string& dst, const flatbuffers::String* fbs_string); + +/// +/// The function loads the lora adapter bytes from the file system +/// +/// file path +/// bytes in a vector +/// If the path can not be found +std::vector LoadLoraAdapterBytes(const std::filesystem::path& file_path); + +/// +/// This function memory maps the adapter file in memory +/// +/// +/// +std::pair MemoryMapAdapterFile(const std::filesystem::path& file_path); + +/// +/// Validates underlying format and the format version +/// +/// +/// Adapter ptr +const Adapter* ValidateAndGetAdapterFromBytes(gsl::span bytes); + +/// +/// Serializes tensor data into flatbuffer +/// +/// +/// parameter name +/// doc, optional +/// +/// +/// +/// output offset +void SaveLoraParameter(flatbuffers::FlatBufferBuilder& flat_builder, std::string_view name, + lora::TensorDataType data_type, + gsl::span shape, gsl::span data, + flatbuffers::Offset& fbs_tensor); + +/// +/// Create an OrtValue on top of the flatbuffer tensor +/// No copying of data is done here. The caller is responsible for managing the lifetime of flatbuffer +/// structures. +/// +/// In this scenario, one can memory map the entire flatbuffer tensor data into OrtValue without copying. +/// +/// +/// +// std::pair CreateOrtValueOverFlatBufferLoraParameter( +// const Generators::lora_parameters::Param& tensor); +} // namespace utils +} // namespace lora +} // namespace Generators diff --git a/onnxruntime/lora/lora_format_version.h b/onnxruntime/lora/lora_format_version.h new file mode 100644 index 0000000000000..9c90a86b16382 --- /dev/null +++ b/onnxruntime/lora/lora_format_version.h @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include +#include + +namespace onnxruntime { +namespace lora { + +// The current model versions for saving lora parameters in flatbuffers +// Once this version is updated, the kSupportedLoraFormatVersions in IsGenAiLoraFormatModelBytes +// below will also need to be updated. +// See src/flatbuffers/schema/README.md for more details on versioning. +// Version 1 - history begins +constexpr const int kLoraFormatVersion = 1; + +// Check if the given lora format version is supported in this build +inline bool IsLoraFormatVersionSupported(const int lora_format_version) { + // The lora format versions we will support in this build + // This may contain more versions than the kLoraFormatVersion, based on the compatibilities + static constexpr std::array kSupportedLoraFormatVersions{ + kLoraFormatVersion, + }; + + const auto it = + std::find(kSupportedLoraFormatVersions.begin(), kSupportedLoraFormatVersions.end(), lora_format_version); + return it != kSupportedLoraFormatVersions.cend(); +} + +} // namespace lora +} // namespace onnxruntime From ee3fcbc3b5260871b2d8957ec4fe1d9988f99b9a Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 30 Aug 2024 15:14:29 -0700 Subject: [PATCH 20/84] Define and expose C API stubs --- cmake/onnxruntime_session.cmake | 2 +- .../onnxruntime/core/framework/run_options.h | 6 ++++ .../core/session/onnxruntime_c_api.h | 32 +++++++++++++++++++ onnxruntime/core/framework/config_options.h | 2 +- onnxruntime/core/framework/run_options.cc | 19 +++++++++++ onnxruntime/core/session/onnxruntime_c_api.cc | 9 ++++-- onnxruntime/core/session/ort_apis.h | 7 ++++ onnxruntime/lora/lora_adapters.cc | 9 ++++++ onnxruntime/lora/lora_adapters.h | 12 +++++-- onnxruntime/lora/lora_format_utils.h | 2 +- 10 files changed, 93 insertions(+), 7 deletions(-) diff --git a/cmake/onnxruntime_session.cmake b/cmake/onnxruntime_session.cmake index b51c875951135..2325ff82dedc5 100644 --- a/cmake/onnxruntime_session.cmake +++ b/cmake/onnxruntime_session.cmake @@ -30,7 +30,7 @@ endif() source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_session_srcs}) onnxruntime_add_static_library(onnxruntime_session ${onnxruntime_session_srcs}) -onnxruntime_add_include_to_target(onnxruntime_session onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface nlohmann_json::nlohmann_json) +onnxruntime_add_include_to_target(onnxruntime_session onnxruntime_common onnxruntime_lora onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface nlohmann_json::nlohmann_json) if(onnxruntime_ENABLE_INSTRUMENT) target_compile_definitions(onnxruntime_session PUBLIC ONNXRUNTIME_ENABLE_INSTRUMENT) endif() diff --git a/include/onnxruntime/core/framework/run_options.h b/include/onnxruntime/core/framework/run_options.h index 789c3b13f2c3e..ed506ed815dec 100644 --- a/include/onnxruntime/core/framework/run_options.h +++ b/include/onnxruntime/core/framework/run_options.h @@ -8,6 +8,10 @@ #include "core/session/onnxruntime_c_api.h" #include "core/framework/config_options.h" +namespace onnxruntime { +struct OrtLoraAdapter; +} // namespace onnxruntime + /** * Configuration information for a Run call. */ @@ -40,6 +44,8 @@ struct OrtRunOptions { // /include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h onnxruntime::ConfigOptions config_options; + std::vector active_adapters_; + OrtRunOptions() = default; ~OrtRunOptions() = default; }; diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index a4ec66761c4ba..fb6d461645253 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -304,6 +304,7 @@ ORT_RUNTIME_CLASS(Op); ORT_RUNTIME_CLASS(OpAttr); ORT_RUNTIME_CLASS(Logger); ORT_RUNTIME_CLASS(ShapeInferContext); +ORT_RUNTIME_CLASS(LoraAdapter); #ifdef _WIN32 typedef _Return_type_success_(return == 0) OrtStatus* OrtStatusPtr; @@ -4670,6 +4671,37 @@ struct OrtApi { _In_reads_(num_external_initializer_files) char* const* external_initializer_file_buffer_array, _In_reads_(num_external_initializer_files) const size_t* external_initializer_file_lengths, size_t num_external_initializer_files); + + /** \brief Create an OrtLoraAdapter + * + * The function attempts to locate file specified by adapter_file_path, read it and create an OrtLoraAdapter + * instance. The adapter_file_path should be a valid absolute path to a file that contains a valid Lora Adapter + * format. The function attempts to validate the format at load time. The file will always be memory mapped, unless + * the platform does not support memory mapping, in which case the file will be read into memory. + * + * \param[in] adapter_file_path Absolute file path to the adapter file. + * \param[out] out A pointer to a newly created OrtLoraAdapter instance. Must be released with + * OrtApi::ReleaseLoraAdapter. + */ + ORT_API2_STATUS(CreateLoraAdapter, const ORTCHAR_T* adapter_file_path, _Outptr_ OrtLoraAdapter** out); + + /** \brief Release an ::OrtLoraAdapter obtained from OrtApi::CreateLoraAdapter + */ + ORT_CLASS_RELEASE(LoraAdapter); + + + /** \brief Set the active Lora Adapter for the run options + * + * The function sets the active Lora Adapter for the run options. The Lora Adapter must be created with + * OrtApi::CreateLoraAdapter. The Lora Adapter will be used by the session to run the model. + * The instance of the OrtRunOptions will then can be used to customize the OrtSession::Run() calls. + * More than one OrtLoraAdapter can be set active at the same time. Lora Parameters that belong to difference + * Lora adapters that will be active at the same time must not overlap. + * + * \param[in] options OrtRunOptions instance + * \param[in] adapter OrtLoraAdapter instance + */ + ORT_API2_STATUS(RunOptionsSetActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter* adapter); }; /* diff --git a/onnxruntime/core/framework/config_options.h b/onnxruntime/core/framework/config_options.h index 7b7c226819e79..efdfdb45abbaa 100644 --- a/onnxruntime/core/framework/config_options.h +++ b/onnxruntime/core/framework/config_options.h @@ -19,7 +19,7 @@ struct ConfigOptions { // Gets the config string associated with the given config_key. // If not found, an empty optional is returned. - optional GetConfigEntry(const std::string& config_key) const noexcept; + std::optional GetConfigEntry(const std::string& config_key) const noexcept; // Check if this instance of ConfigOptions has a config using the given config_key. // Returns true if found and copies the value into config_value. diff --git a/onnxruntime/core/framework/run_options.cc b/onnxruntime/core/framework/run_options.cc index 95c111009c791..65f36f07c4847 100644 --- a/onnxruntime/core/framework/run_options.cc +++ b/onnxruntime/core/framework/run_options.cc @@ -2,12 +2,15 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. #include "core/framework/run_options.h" +#include "lora/lora_adapters.h" #include "core/session/onnxruntime_c_api.h" #include "core/session/ort_apis.h" #include "core/framework/error_code_helper.h" + #if defined(_MSC_VER) && !defined(__clang__) #pragma warning(disable : 26409) #endif + ORT_API_STATUS_IMPL(OrtApis::CreateRunOptions, _Outptr_ OrtRunOptions** out) { API_IMPL_BEGIN *out = new OrtRunOptions(); @@ -60,3 +63,19 @@ ORT_API_STATUS_IMPL(OrtApis::AddRunConfigEntry, _Inout_ OrtRunOptions* options, _In_z_ const char* config_key, _In_z_ const char* config_value) { return onnxruntime::ToOrtStatus(options->config_options.AddConfigEntry(config_key, config_value)); } + +ORT_API_STATUS_IMPL(OrtApis::RunOptionsSetActiveLoraAdapter, _Inout_ OrtRunOptions*, const _In_ OrtLoraAdapter*) { + // Need cast to the real type + // options->active_adapters_.push_back(adapter); + return nullptr; +} + +ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapter, const ORTCHAR_T*, _Outptr_ OrtLoraAdapter**) { + //auto adapter = new onnxruntime::OrtLoraAdapter(adapter_file_path); + // *out = adapter.release(); + return nullptr; +} + +ORT_API(void, OrtApis::ReleaseLoraAdapter, _Frees_ptr_opt_ OrtLoraAdapter*) { + // delete reinterpret_cast(adapter); +} diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index 1a5484ddc0055..96f2ee1e14ee1 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -2730,6 +2730,9 @@ static constexpr OrtApi ort_api_1_to_20 = { &OrtApis::KernelInfoGetAllocator, &OrtApis::AddExternalInitializersFromFilesInMemory, // End of Version 18 - DO NOT MODIFY ABOVE (see above text for more information) + &OrtApis::CreateLoraAdapter, + &OrtApis::ReleaseLoraAdapter, + &OrtApis::RunOptionsSetActiveLoraAdapter, }; // OrtApiBase can never change as there is no way to know what version of OrtApiBase is returned by OrtGetApiBase. @@ -2786,7 +2789,7 @@ ORT_API(const char*, OrtApis::GetVersionString) { return ORT_VERSION; } -ORT_API(const char*, OrtApis::GetBuildInfoString) { +const char* _stdcall OrtApis::GetBuildInfoString() noexcept { return ORT_BUILD_INFO; } @@ -2799,6 +2802,8 @@ ORT_API(void, OrtApis::ReleaseEnv, OrtEnv* value) { } DEFINE_RELEASE_ORT_OBJECT_FUNCTION(Value, OrtValue) -DEFINE_RELEASE_ORT_OBJECT_FUNCTION(RunOptions, OrtRunOptions) +void _stdcall OrtApis::ReleaseRunOptions(OrtRunOptions* value) noexcept { + delete reinterpret_cast(value); +} DEFINE_RELEASE_ORT_OBJECT_FUNCTION(Session, ::onnxruntime::InferenceSession) DEFINE_RELEASE_ORT_OBJECT_FUNCTION(ModelMetadata, ::onnxruntime::ModelMetadata) diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h index fcae173e6c162..8564985beb9f7 100644 --- a/onnxruntime/core/session/ort_apis.h +++ b/onnxruntime/core/session/ort_apis.h @@ -118,6 +118,8 @@ ORT_API_STATUS_IMPL(RunOptionsGetRunTag, _In_ const OrtRunOptions*, _Out_ const ORT_API_STATUS_IMPL(RunOptionsSetTerminate, _Inout_ OrtRunOptions* options); ORT_API_STATUS_IMPL(RunOptionsUnsetTerminate, _Inout_ OrtRunOptions* options); +ORT_API_STATUS_IMPL(RunOptionsSetActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter*); + ORT_API_STATUS_IMPL(CreateTensorAsOrtValue, _Inout_ OrtAllocator* allocator, _In_ const int64_t* shape, size_t shape_len, ONNXTensorElementDataType type, @@ -523,4 +525,9 @@ ORT_API_STATUS_IMPL(SessionOptionsAppendExecutionProvider_VitisAI, _In_ OrtSessi ORT_API_STATUS_IMPL(KernelContext_GetScratchBuffer, _In_ const OrtKernelContext* context, _In_ const OrtMemoryInfo* mem_info, _In_ size_t count_or_bytes, _Outptr_ void** out); ORT_API_STATUS_IMPL(KernelInfoGetAllocator, _In_ const OrtKernelInfo* info, _In_ OrtMemType mem_type, _Outptr_ OrtAllocator** out); + +ORT_API_STATUS_IMPL(CreateLoraAdapter, _In_ const ORTCHAR_T* adapter_file_path, _Outptr_ OrtLoraAdapter** out); +ORT_API(void, ReleaseLoraAdapter, _Frees_ptr_opt_ OrtLoraAdapter*); +ORT_API_STATUS_IMPL(RunOptionsSetActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter* adapter); + } // namespace OrtApis diff --git a/onnxruntime/lora/lora_adapters.cc b/onnxruntime/lora/lora_adapters.cc index 1425690b41660..e10a577076547 100644 --- a/onnxruntime/lora/lora_adapters.cc +++ b/onnxruntime/lora/lora_adapters.cc @@ -14,12 +14,21 @@ namespace details { LoraParam::LoraParam(std::string name, OrtValue ort_value) : name_(std::move(name)), ort_value_(std::move(ort_value)) {} +BinaryFormatHolder::~BinaryFormatHolder() = default; + void BinaryFormatHolder::Load(const std::filesystem::path& file_path) { auto buffer = utils::LoadLoraAdapterBytes(file_path); adapter_ = utils::ValidateAndGetAdapterFromBytes(buffer); buffer_.emplace(std::move(buffer)); } +void BinaryFormatHolder::MemoryMap(const std::filesystem::path& file_path) { + auto [mapped_memory, file_size] = utils::MemoryMapAdapterFile(file_path); + auto u8_span = ReinterpretAsSpan(gsl::make_span(mapped_memory.get(), file_size)); + adapter_ = utils::ValidateAndGetAdapterFromBytes(u8_span); + buffer_.emplace(std::move(mapped_memory), file_size); +} + size_t BinaryFormatHolder::GetSize() const { if (std::holds_alternative(buffer_)) { return std::get<0>(buffer_).file_size_; diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h index 5966e14248fb0..541ae59fbe293 100644 --- a/onnxruntime/lora/lora_adapters.h +++ b/onnxruntime/lora/lora_adapters.h @@ -25,14 +25,22 @@ class BinaryFormatHolder { BinaryFormatHolder() = default; BinaryFormatHolder(const BinaryFormatHolder&) = delete; BinaryFormatHolder& operator=(const BinaryFormatHolder&) = delete; + ~BinaryFormatHolder(); + + BinaryFormatHolder(BinaryFormatHolder&&) = default; + BinaryFormatHolder& operator=(BinaryFormatHolder&&) = default; /// - /// Load parameters from a flatbuffer file. + /// Load parameters from an adapter file and validates its format. /// /// file name that can be opened void Load(const std::filesystem::path& file_path); - void MemoryMapFile(const std::string& file_name); + /// + /// Memory maps adapter file into memory and validates its format. + /// + /// + void MemoryMap(const std::filesystem::path& file_path); // Get Flatbuffer object pointer const Adapter* GetParameters() const noexcept { return adapter_; } diff --git a/onnxruntime/lora/lora_format_utils.h b/onnxruntime/lora/lora_format_utils.h index 508eb38ffb27c..75b499eb0bef9 100644 --- a/onnxruntime/lora/lora_format_utils.h +++ b/onnxruntime/lora/lora_format_utils.h @@ -46,7 +46,7 @@ std::vector LoadLoraAdapterBytes(const std::filesystem::path& file_path /// This function memory maps the adapter file in memory /// /// -/// +/// memory handle and file size in a tuple std::pair MemoryMapAdapterFile(const std::filesystem::path& file_path); /// From 4410f8507a05d0dd0e418eafd1ec5933580b6432 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 30 Aug 2024 16:45:05 -0700 Subject: [PATCH 21/84] Add loading --- .../core/session/onnxruntime_cxx_api.h | 12 ++++++ .../core/session/onnxruntime_cxx_inline.h | 8 ++++ onnxruntime/lora/lora_adapters.cc | 8 ++++ onnxruntime/lora/lora_adapters.h | 43 ++++++++++++++++++- onnxruntime/lora/lora_format_utils.cc | 32 ++++++++------ onnxruntime/lora/lora_format_utils.h | 22 ++++++++-- 6 files changed, 107 insertions(+), 18 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h index 29a229f427163..b8f61d2e3d22f 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h @@ -508,6 +508,7 @@ ORT_DEFINE_RELEASE(CustomOpDomain); ORT_DEFINE_RELEASE(ThreadingOptions); ORT_DEFINE_RELEASE(Env); ORT_DEFINE_RELEASE(RunOptions); +ORT_DEFINE_RELEASE(LoraAdapter); ORT_DEFINE_RELEASE(Session); ORT_DEFINE_RELEASE(SessionOptions); ORT_DEFINE_RELEASE(TensorTypeAndShapeInfo); @@ -736,6 +737,15 @@ struct CustomOpDomain : detail::Base { void Add(const OrtCustomOp* op); ///< Wraps CustomOpDomain_Add }; +/// \brief LoraAdapter holds a set of Lora Parameters loaded from a single file +struct LoraAdapter : detail::Base { + /// \brief Wraps OrtApi::CreateLoraAdapter + /// + /// The function attempts to load the adapter from the specified file + /// \param absolute_adapter_path The absolute path to the Lora adapter + explicit LoraAdapter(const std::basic_string& absolute_adapter_path); +}; + /** \brief RunOptions * */ @@ -766,6 +776,8 @@ struct RunOptions : detail::Base { * Wraps OrtApi::RunOptionsUnsetTerminate */ RunOptions& UnsetTerminate(); + + RunOptions& SetLoraAdapterActive(const LoraAdapter& adapter); }; namespace detail { diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h index d3a8cade4d28f..f7290aa610ff4 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h @@ -557,6 +557,10 @@ inline void CustomOpDomain::Add(const OrtCustomOp* op) { ThrowOnError(GetApi().CustomOpDomain_Add(p_, op)); } +inline LoraAdapter::LoraAdapter(const std::basic_string& absolute_adapter_path) { + ThrowOnError(GetApi().CreateLoraAdapter(absolute_adapter_path.c_str(), &p_)); +} + inline RunOptions::RunOptions() { ThrowOnError(GetApi().CreateRunOptions(&p_)); } @@ -609,6 +613,10 @@ inline RunOptions& RunOptions::UnsetTerminate() { return *this; } +inline RunOptions& RunOptions::SetLoraAdapterActive(const LoraAdapter& adapter) { + ThrowOnError(GetApi().RunOptionsSetActiveLoraAdapter(p_, adapter)); +} + namespace detail { template diff --git a/onnxruntime/lora/lora_adapters.cc b/onnxruntime/lora/lora_adapters.cc index e10a577076547..d1b3a56413bee 100644 --- a/onnxruntime/lora/lora_adapters.cc +++ b/onnxruntime/lora/lora_adapters.cc @@ -40,5 +40,13 @@ size_t BinaryFormatHolder::GetSize() const { } // namespace details +void LoraAdapter::Load(const std::filesystem::path& file_path) { + binary_format_holder_.Load(file_path); +} + +void LoraAdapter::MemoryMap(const std::filesystem::path& file_path) { + binary_format_holder_.MemoryMap(file_path); +} + } // namespace lora } // namespace onnxruntime \ No newline at end of file diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h index 541ae59fbe293..bb887bbe04b85 100644 --- a/onnxruntime/lora/lora_adapters.h +++ b/onnxruntime/lora/lora_adapters.h @@ -3,9 +3,12 @@ #pragma once +#include "core/common/inlined_containers.h" #include "core/platform/env.h" #include "core/framework/ort_value.h" +#include "lora/lora_format_utils.h" + #include #include #include @@ -43,7 +46,7 @@ class BinaryFormatHolder { void MemoryMap(const std::filesystem::path& file_path); // Get Flatbuffer object pointer - const Adapter* GetParameters() const noexcept { return adapter_; } + const Adapter* GetBinaryAdapter() const noexcept { return adapter_; } // Get the size of the buffer size_t GetSize() const; @@ -62,7 +65,7 @@ class BinaryFormatHolder { }; std::variant buffer_; - const Adapter* adapter_; + const Adapter* adapter_{nullptr}; }; /// @@ -78,5 +81,41 @@ struct LoraParam { } // namespace details +/// +/// Container to hold and access Lora Parameters +/// +class LoraAdapter { + public: + LoraAdapter() = default; + LoraAdapter(const LoraAdapter&) = delete; + LoraAdapter& operator=(const LoraAdapter&) = delete; + ~LoraAdapter() = default; + + LoraAdapter(LoraAdapter&&) = default; + LoraAdapter& operator=(LoraAdapter&&) = default; + + /// + /// Load parameters into memory from an adapter file and validates its format. + /// + /// file name that can be opened + void Load(const std::filesystem::path& file_path); + + /// + /// Memory maps adapter file into memory and validates its format. + /// + /// + void MemoryMap(const std::filesystem::path& file_path); + + template + void OutputAdaptersParameters(NamesOutputIter names_out, + TensorOutputIter params_out) { + const auto* adapter = binary_format_holder_.GetBinaryAdapter(); + utils::OutputAdaptersParameters(*adapter, names_out, params_out); + } + + private: + details::BinaryFormatHolder binary_format_holder_; +}; + } // namespace lora } // namespace onnxruntime diff --git a/onnxruntime/lora/lora_format_utils.cc b/onnxruntime/lora/lora_format_utils.cc index 1e2ce1d58d0fe..b9710af4a570e 100644 --- a/onnxruntime/lora/lora_format_utils.cc +++ b/onnxruntime/lora/lora_format_utils.cc @@ -6,6 +6,10 @@ #include "core/common/common.h" #include "core/common/span_utils.h" +#include "core/framework/ortdevice.h" +#include "core/framework/ortmemoryinfo.h" +#include "core/framework/ort_value.h" +#include "core/framework/tensor.h" #include @@ -89,23 +93,25 @@ void SaveLoraParameter(flatbuffers::FlatBufferBuilder& flat_builder, std::string fbs_tensor = CreateParameter(flat_builder, name_str, shape_vec, data_type, data_vec); } -// std::pair CreateOrtValueOverFlatBufferLoraParameter( -// const Parameter& tensor) { -// std::string name; -// LoadStringFromLoraFormat(name, tensor.name()); +std::pair CreateOrtValueOverLoraParameter(const Parameter& param) { + OrtValue result; -// const auto data_type = tensor.data_type(); + std::string name; + LoadStringFromLoraFormat(name, param.name()); -// gsl::span shape_span(tensor.dims()->data(), tensor.dims()->size()); + const auto data_type = param.data_type(); + gsl::span shape_span(param.dims()->data(), param.dims()->size()); -// auto mem_info = OrtMemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault); -// auto ort_value = -// OrtValue::CreateTensor(*mem_info, const_cast(tensor.raw_data()->data()), -// static_cast(tensor.raw_data()->size()), shape_span, -// static_cast(data_type)); -// return std::make_pair(std::move(name), std::move(ort_value)); -// } + OrtMemoryInfo cpu_meminfo(CPU, OrtAllocatorType::OrtDeviceAllocator); + Tensor::InitOrtValue(DataTypeImpl::TensorTypeFromONNXEnum(data_type)->GetElementType(), + TensorShape(shape_span), + const_cast(param.raw_data()->data()), + cpu_meminfo, + result); + + return std::make_pair(std::move(name), std::move(result)); +} } // namespace utils } // namespace lora } // namespace onnxruntime diff --git a/onnxruntime/lora/lora_format_utils.h b/onnxruntime/lora/lora_format_utils.h index 75b499eb0bef9..c6d8efcab80c9 100644 --- a/onnxruntime/lora/lora_format_utils.h +++ b/onnxruntime/lora/lora_format_utils.h @@ -16,6 +16,8 @@ #include #include +struct OrtValue; + namespace onnxruntime { namespace lora { namespace utils { @@ -80,8 +82,22 @@ void SaveLoraParameter(flatbuffers::FlatBufferBuilder& flat_builder, std::string /// /// /// -// std::pair CreateOrtValueOverFlatBufferLoraParameter( -// const Generators::lora_parameters::Param& tensor); +std::pair CreateOrtValueOverLoraParameter(const Parameter& param); + +template +void OutputAdaptersParameters(const Adapter& adapter, + NamesOutputIter names_out, + TensorOutputIter params_out) { + const auto* params = adapter.parameters(); + for (const auto* param : params) { + auto [name, ort_value] = utils::CreateOrtValueOverLoraParameter(*param); + *names_out = std::move(name); + ++names_out; + *params_out = std::move(ort_value); + ++params_out; + } +} + } // namespace utils } // namespace lora -} // namespace Generators +} // namespace onnxruntime From 300c982b80288ea57b8b588530b162ffd8f05678 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Tue, 3 Sep 2024 14:34:49 -0700 Subject: [PATCH 22/84] Implement LoraAdapter and public APIs --- .../onnxruntime/core/framework/run_options.h | 6 +- onnxruntime/core/framework/run_options.cc | 24 +++-- onnxruntime/lora/lora_adapters.cc | 44 ++++---- onnxruntime/lora/lora_adapters.h | 101 +++++++----------- 4 files changed, 83 insertions(+), 92 deletions(-) diff --git a/include/onnxruntime/core/framework/run_options.h b/include/onnxruntime/core/framework/run_options.h index ed506ed815dec..aa741af0f1643 100644 --- a/include/onnxruntime/core/framework/run_options.h +++ b/include/onnxruntime/core/framework/run_options.h @@ -9,7 +9,9 @@ #include "core/framework/config_options.h" namespace onnxruntime { -struct OrtLoraAdapter; +namespace lora { +class LoraAdapter; +} } // namespace onnxruntime /** @@ -44,7 +46,7 @@ struct OrtRunOptions { // /include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h onnxruntime::ConfigOptions config_options; - std::vector active_adapters_; + std::vector active_adapters_; OrtRunOptions() = default; ~OrtRunOptions() = default; diff --git a/onnxruntime/core/framework/run_options.cc b/onnxruntime/core/framework/run_options.cc index 65f36f07c4847..00e2a17d60df5 100644 --- a/onnxruntime/core/framework/run_options.cc +++ b/onnxruntime/core/framework/run_options.cc @@ -64,18 +64,26 @@ ORT_API_STATUS_IMPL(OrtApis::AddRunConfigEntry, _Inout_ OrtRunOptions* options, return onnxruntime::ToOrtStatus(options->config_options.AddConfigEntry(config_key, config_value)); } -ORT_API_STATUS_IMPL(OrtApis::RunOptionsSetActiveLoraAdapter, _Inout_ OrtRunOptions*, const _In_ OrtLoraAdapter*) { - // Need cast to the real type - // options->active_adapters_.push_back(adapter); +ORT_API_STATUS_IMPL(OrtApis::RunOptionsSetActiveLoraAdapter, _Inout_ OrtRunOptions* options, + const _In_ OrtLoraAdapter* adapter) { + API_IMPL_BEGIN + auto* lora_adapter = reinterpret_cast(adapter); + options->active_adapters_.push_back(lora_adapter); return nullptr; + API_IMPL_END } -ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapter, const ORTCHAR_T*, _Outptr_ OrtLoraAdapter**) { - //auto adapter = new onnxruntime::OrtLoraAdapter(adapter_file_path); - // *out = adapter.release(); +ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapter, const ORTCHAR_T* adapter_file_path, + _Outptr_ OrtLoraAdapter** adapter) { + API_IMPL_BEGIN + auto lora_adapter = std::make_unique(); + // For platforms that do not support Memmap, we can #ifdef it to ->Load(adapter_file_path) + lora_adapter->Load(adapter_file_path); + *adapter = reinterpret_cast(lora_adapter.release()); return nullptr; + API_IMPL_END } -ORT_API(void, OrtApis::ReleaseLoraAdapter, _Frees_ptr_opt_ OrtLoraAdapter*) { - // delete reinterpret_cast(adapter); +ORT_API(void, OrtApis::ReleaseLoraAdapter, _Frees_ptr_opt_ OrtLoraAdapter* adapter) { + delete reinterpret_cast(adapter); } diff --git a/onnxruntime/lora/lora_adapters.cc b/onnxruntime/lora/lora_adapters.cc index d1b3a56413bee..2db4eff754428 100644 --- a/onnxruntime/lora/lora_adapters.cc +++ b/onnxruntime/lora/lora_adapters.cc @@ -9,43 +9,49 @@ namespace onnxruntime { namespace lora { -namespace details { -LoraParam::LoraParam(std::string name, OrtValue ort_value) +LoraAdapter::LoraParam::LoraParam(std::string name, OrtValue ort_value) noexcept : name_(std::move(name)), ort_value_(std::move(ort_value)) {} -BinaryFormatHolder::~BinaryFormatHolder() = default; - -void BinaryFormatHolder::Load(const std::filesystem::path& file_path) { +void LoraAdapter::Load(const std::filesystem::path& file_path) { auto buffer = utils::LoadLoraAdapterBytes(file_path); adapter_ = utils::ValidateAndGetAdapterFromBytes(buffer); buffer_.emplace(std::move(buffer)); + + InitializeParamsValues(); } -void BinaryFormatHolder::MemoryMap(const std::filesystem::path& file_path) { +void LoraAdapter::MemoryMap(const std::filesystem::path& file_path) { auto [mapped_memory, file_size] = utils::MemoryMapAdapterFile(file_path); auto u8_span = ReinterpretAsSpan(gsl::make_span(mapped_memory.get(), file_size)); adapter_ = utils::ValidateAndGetAdapterFromBytes(u8_span); buffer_.emplace(std::move(mapped_memory), file_size); -} -size_t BinaryFormatHolder::GetSize() const { - if (std::holds_alternative(buffer_)) { - return std::get<0>(buffer_).file_size_; - } else if (std::holds_alternative(buffer_)) { - return std::get<1>(buffer_).buffer_.size(); - } - ORT_THROW("Non-exhaustive visitor for BinaryFormatHolder::GetSize()"); + InitializeParamsValues(); } -} // namespace details +void LoraAdapter::InitializeParamsValues() { + if (adapter_ == nullptr) { + ORT_THROW("Adapter is not loaded yet."); + } -void LoraAdapter::Load(const std::filesystem::path& file_path) { - binary_format_holder_.Load(file_path); + const auto* params = adapter_->parameters(); + InlinedHashMap params_values; + params_values.reserve(params->size()); + for (const auto* param : *params) { + auto [name, ort_value] = utils::CreateOrtValueOverLoraParameter(*param); + params_values.emplace(name, LoraParam(std::move(name), std::move(ort_value))); + } + params_values_.swap(params_values); } -void LoraAdapter::MemoryMap(const std::filesystem::path& file_path) { - binary_format_holder_.MemoryMap(file_path); +size_t LoraAdapter::GetSize() const { + if (std::holds_alternative(buffer_)) { + return std::get<1>(buffer_).file_size_; + } else if (std::holds_alternative(buffer_)) { + return std::get<2>(buffer_).buffer_.size(); + } + ORT_THROW("Non-exhaustive visitor for BinaryFormatHolder::GetSize()"); } } // namespace lora diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h index bb887bbe04b85..066f066ed39f7 100644 --- a/onnxruntime/lora/lora_adapters.h +++ b/onnxruntime/lora/lora_adapters.h @@ -3,6 +3,7 @@ #pragma once +#include "core/common/common.h" #include "core/common/inlined_containers.h" #include "core/platform/env.h" #include "core/framework/ort_value.h" @@ -17,24 +18,20 @@ namespace onnxruntime { namespace lora { -struct Adapter; - -namespace details { -// This class takes hold of the serialized parameters that -// are either loaded from disk or mapped from disk (coming in the future) -// This data is always in host memory. -class BinaryFormatHolder { +/// +/// Container to hold and access Lora Parameters +/// +class LoraAdapter { public: - BinaryFormatHolder() = default; - BinaryFormatHolder(const BinaryFormatHolder&) = delete; - BinaryFormatHolder& operator=(const BinaryFormatHolder&) = delete; - ~BinaryFormatHolder(); + LoraAdapter() = default; + ~LoraAdapter() = default; + ORT_DISALLOW_COPY_AND_ASSIGNMENT(LoraAdapter); - BinaryFormatHolder(BinaryFormatHolder&&) = default; - BinaryFormatHolder& operator=(BinaryFormatHolder&&) = default; + LoraAdapter(LoraAdapter&&) = default; + LoraAdapter& operator=(LoraAdapter&&) = default; /// - /// Load parameters from an adapter file and validates its format. + /// Load parameters into memory from an adapter file and validates its format. /// /// file name that can be opened void Load(const std::filesystem::path& file_path); @@ -45,13 +42,27 @@ class BinaryFormatHolder { /// void MemoryMap(const std::filesystem::path& file_path); - // Get Flatbuffer object pointer - const Adapter* GetBinaryAdapter() const noexcept { return adapter_; } + /// + /// Outputs the names and tensor values of the parameters to the + /// specified output iterators + /// + /// output iterator accepting const char* + /// Output Iterator accepting OrtValue + /// + /// + template + void OutputAdaptersParameters(NamesOutputIter names_out, + TensorOutputIter params_out) { + const auto* adapter = binary_format_holder_.GetBinaryAdapter(); + // utils::OutputAdaptersParameters(*adapter, names_out, params_out); + } + + private: + void InitializeParamsValues(); // Get the size of the buffer size_t GetSize() const; - private: struct BufferHolder { explicit BufferHolder(std::vector buffer) : buffer_(std::move(buffer)) {} std::vector buffer_; @@ -64,57 +75,21 @@ class BinaryFormatHolder { size_t file_size_; }; - std::variant buffer_; - const Adapter* adapter_{nullptr}; -}; - -/// -/// Represents a named lora parameter (tensor) -/// -struct LoraParam { - LoraParam() = default; - LoraParam(std::string name, OrtValue parameter); - - std::string name_; - OrtValue ort_value_; -}; - -} // namespace details + std::variant buffer_; /// -/// Container to hold and access Lora Parameters -/// -class LoraAdapter { - public: - LoraAdapter() = default; - LoraAdapter(const LoraAdapter&) = delete; - LoraAdapter& operator=(const LoraAdapter&) = delete; - ~LoraAdapter() = default; - - LoraAdapter(LoraAdapter&&) = default; - LoraAdapter& operator=(LoraAdapter&&) = default; - - /// - /// Load parameters into memory from an adapter file and validates its format. - /// - /// file name that can be opened - void Load(const std::filesystem::path& file_path); - - /// - /// Memory maps adapter file into memory and validates its format. + /// Represents a named lora parameter (tensor) /// - /// - void MemoryMap(const std::filesystem::path& file_path); + struct LoraParam { + LoraParam() = default; + LoraParam(std::string name, OrtValue parameter) noexcept; - template - void OutputAdaptersParameters(NamesOutputIter names_out, - TensorOutputIter params_out) { - const auto* adapter = binary_format_holder_.GetBinaryAdapter(); - utils::OutputAdaptersParameters(*adapter, names_out, params_out); - } + std::string name_; + OrtValue ort_value_; + }; - private: - details::BinaryFormatHolder binary_format_holder_; + const Adapter* adapter_{nullptr}; + InlinedHashMap params_values_; }; } // namespace lora From 8cc1d9dec316797ae2fac54ff452a1ae63c4d82c Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Tue, 3 Sep 2024 15:36:29 -0700 Subject: [PATCH 23/84] Move Release to create --- cmake/onnxruntime.cmake | 1 + onnxruntime/core/framework/run_options.cc | 15 --------------- onnxruntime/lora/lora_adapters.cc | 21 ++++++++++++++++++++- onnxruntime/lora/lora_adapters.h | 3 ++- 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake index 927b4ac84b037..a946f17e2dfc2 100644 --- a/cmake/onnxruntime.cmake +++ b/cmake/onnxruntime.cmake @@ -207,6 +207,7 @@ set(onnxruntime_INTERNAL_LIBRARIES onnxruntime_optimizer onnxruntime_providers ${onnxruntime_tvm_libs} + onnxruntime_lora onnxruntime_framework onnxruntime_graph onnxruntime_util diff --git a/onnxruntime/core/framework/run_options.cc b/onnxruntime/core/framework/run_options.cc index 00e2a17d60df5..b0ea7f7c9d843 100644 --- a/onnxruntime/core/framework/run_options.cc +++ b/onnxruntime/core/framework/run_options.cc @@ -72,18 +72,3 @@ ORT_API_STATUS_IMPL(OrtApis::RunOptionsSetActiveLoraAdapter, _Inout_ OrtRunOptio return nullptr; API_IMPL_END } - -ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapter, const ORTCHAR_T* adapter_file_path, - _Outptr_ OrtLoraAdapter** adapter) { - API_IMPL_BEGIN - auto lora_adapter = std::make_unique(); - // For platforms that do not support Memmap, we can #ifdef it to ->Load(adapter_file_path) - lora_adapter->Load(adapter_file_path); - *adapter = reinterpret_cast(lora_adapter.release()); - return nullptr; - API_IMPL_END -} - -ORT_API(void, OrtApis::ReleaseLoraAdapter, _Frees_ptr_opt_ OrtLoraAdapter* adapter) { - delete reinterpret_cast(adapter); -} diff --git a/onnxruntime/lora/lora_adapters.cc b/onnxruntime/lora/lora_adapters.cc index 2db4eff754428..d089d7b48aa14 100644 --- a/onnxruntime/lora/lora_adapters.cc +++ b/onnxruntime/lora/lora_adapters.cc @@ -4,6 +4,10 @@ #include "lora_adapters.h" #include "lora_format_utils.h" +#include "core/session/onnxruntime_c_api.h" +#include "core/session/ort_apis.h" +#include "core/framework/error_code_helper.h" + #include #include @@ -55,4 +59,19 @@ size_t LoraAdapter::GetSize() const { } } // namespace lora -} // namespace onnxruntime \ No newline at end of file +} // namespace onnxruntime + +ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapter, const ORTCHAR_T* adapter_file_path, + _Outptr_ OrtLoraAdapter** adapter) { + API_IMPL_BEGIN + auto lora_adapter = std::make_unique(); + // For platforms that do not support Memmap, we can #ifdef it to ->Load(adapter_file_path) + lora_adapter->Load(adapter_file_path); + *adapter = reinterpret_cast(lora_adapter.release()); + return nullptr; + API_IMPL_END +} + +ORT_API(void, OrtApis::ReleaseLoraAdapter, _Frees_ptr_opt_ OrtLoraAdapter* adapter) { + delete reinterpret_cast(adapter); +} diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h index 066f066ed39f7..c373f7cbf6dbe 100644 --- a/onnxruntime/lora/lora_adapters.h +++ b/onnxruntime/lora/lora_adapters.h @@ -25,7 +25,8 @@ class LoraAdapter { public: LoraAdapter() = default; ~LoraAdapter() = default; - ORT_DISALLOW_COPY_AND_ASSIGNMENT(LoraAdapter); + LoraAdapter(const LoraAdapter&) = delete; + LoraAdapter& operator=(const LoraAdapter&) = delete; LoraAdapter(LoraAdapter&&) = default; LoraAdapter& operator=(LoraAdapter&&) = default; From 79e62bc454265ccb398b1e4c7d2cc3122e5078cc Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 4 Sep 2024 11:38:20 -0700 Subject: [PATCH 24/84] Implement unit test --- cmake/onnxruntime_unittests.cmake | 7 +- onnxruntime/lora/lora_adapters.cc | 2 +- onnxruntime/lora/lora_adapters.h | 38 ++++--- onnxruntime/lora/lora_format_utils.h | 14 --- onnxruntime/test/lora/lora_test.cc | 105 ++++++++++++++++++ .../testdata/lora/lora_unit_test_adapter.fb | Bin 0 -> 432 bytes 6 files changed, 137 insertions(+), 29 deletions(-) create mode 100644 onnxruntime/test/lora/lora_test.cc create mode 100644 onnxruntime/test/testdata/lora/lora_unit_test_adapter.fb diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index 533a15cc641bd..0bc2250e2ef8e 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -263,6 +263,11 @@ file(GLOB onnxruntime_test_flatbuffers_src CONFIGURE_DEPENDS "${TEST_SRC_DIR}/flatbuffers/*.h" ) +file(GLOB onnxruntime_test_lora_src CONFIGURE_DEPENDS + "${TEST_SRC_DIR}/lora/*.cc" + "${TEST_SRC_DIR}/lora/*.h" +) + if(NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_REDUCED_OPS_BUILD) file(GLOB onnxruntime_test_ir_src CONFIGURE_DEPENDS @@ -783,7 +788,7 @@ endif() set(all_tests ${onnxruntime_test_common_src} ${onnxruntime_test_ir_src} ${onnxruntime_test_optimizer_src} ${onnxruntime_test_framework_src} ${onnxruntime_test_providers_src} ${onnxruntime_test_quantization_src} - ${onnxruntime_test_flatbuffers_src}) + ${onnxruntime_test_flatbuffers_src} ${onnxruntime_test_lora_src}) if (onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS) file(GLOB onnxruntime_test_providers_cuda_ut_src CONFIGURE_DEPENDS diff --git a/onnxruntime/lora/lora_adapters.cc b/onnxruntime/lora/lora_adapters.cc index d089d7b48aa14..e2214a15d2563 100644 --- a/onnxruntime/lora/lora_adapters.cc +++ b/onnxruntime/lora/lora_adapters.cc @@ -49,7 +49,7 @@ void LoraAdapter::InitializeParamsValues() { params_values_.swap(params_values); } -size_t LoraAdapter::GetSize() const { +size_t LoraAdapter::GetBufferSize() const { if (std::holds_alternative(buffer_)) { return std::get<1>(buffer_).file_size_; } else if (std::holds_alternative(buffer_)) { diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h index c373f7cbf6dbe..a943e92175030 100644 --- a/onnxruntime/lora/lora_adapters.h +++ b/onnxruntime/lora/lora_adapters.h @@ -25,7 +25,7 @@ class LoraAdapter { public: LoraAdapter() = default; ~LoraAdapter() = default; - LoraAdapter(const LoraAdapter&) = delete; + LoraAdapter(const LoraAdapter&) = delete; LoraAdapter& operator=(const LoraAdapter&) = delete; LoraAdapter(LoraAdapter&&) = default; @@ -44,25 +44,37 @@ class LoraAdapter { void MemoryMap(const std::filesystem::path& file_path); /// - /// Outputs the names and tensor values of the parameters to the - /// specified output iterators + /// Returns number of parameters in the adapter. + /// The number is expected to be even as lora params come in pairs. /// - /// output iterator accepting const char* - /// Output Iterator accepting OrtValue - /// - /// + /// size of params_values_ container + size_t GetParamNum() const { + return params_values_.size(); + } + + /// + /// Outputs Lora Parameters, their names and values + /// into the supplied output iterators. + /// + /// + /// + /// output iterator that accepts const char* + /// output iterator that accepts OrtValue template void OutputAdaptersParameters(NamesOutputIter names_out, - TensorOutputIter params_out) { - const auto* adapter = binary_format_holder_.GetBinaryAdapter(); - // utils::OutputAdaptersParameters(*adapter, names_out, params_out); + TensorOutputIter tensor_out) const { + for (const auto& [name, param] : params_values_) { + *names_out = name.c_str(); + ++names_out; + *tensor_out = param.ort_value_; + ++tensor_out; + } } private: - void InitializeParamsValues(); // Get the size of the buffer - size_t GetSize() const; + size_t GetBufferSize() const; struct BufferHolder { explicit BufferHolder(std::vector buffer) : buffer_(std::move(buffer)) {} @@ -78,7 +90,7 @@ class LoraAdapter { std::variant buffer_; -/// + /// /// Represents a named lora parameter (tensor) /// struct LoraParam { diff --git a/onnxruntime/lora/lora_format_utils.h b/onnxruntime/lora/lora_format_utils.h index c6d8efcab80c9..e5587fd730925 100644 --- a/onnxruntime/lora/lora_format_utils.h +++ b/onnxruntime/lora/lora_format_utils.h @@ -84,20 +84,6 @@ void SaveLoraParameter(flatbuffers::FlatBufferBuilder& flat_builder, std::string /// std::pair CreateOrtValueOverLoraParameter(const Parameter& param); -template -void OutputAdaptersParameters(const Adapter& adapter, - NamesOutputIter names_out, - TensorOutputIter params_out) { - const auto* params = adapter.parameters(); - for (const auto* param : params) { - auto [name, ort_value] = utils::CreateOrtValueOverLoraParameter(*param); - *names_out = std::move(name); - ++names_out; - *params_out = std::move(ort_value); - ++params_out; - } -} - } // namespace utils } // namespace lora } // namespace onnxruntime diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc new file mode 100644 index 0000000000000..832056f35870e --- /dev/null +++ b/onnxruntime/test/lora/lora_test.cc @@ -0,0 +1,105 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "lora/lora_adapters.h" +#include "lora/lora_format_version.h" +#include "lora/lora_format_utils.h" +#include "gtest/gtest.h" + +#include + +namespace onnxruntime { +namespace test { + +// TEST(LoraFormatTest, CreateAdapter) { +// // Generate a random sequence of floats +// // shape = {8, 4} +// constexpr std::array shape = {8, 4}; +// std::vector param_1(32); +// std::iota(param_1.begin(), param_1.end(), 0.0f); +// +// std::vector param_2(32); +// std::iota(param_2.begin(), param_2.end(), 33.0f); +// +// flatbuffers::FlatBufferBuilder builder; +// std::vector> params; +// params.reserve(2); +// flatbuffers::Offset fbs_param_1, fbs_param_2; +// auto byte_span = ReinterpretAsSpan(gsl::make_span(param_1)); +// lora::utils::SaveLoraParameter(builder, "param_1", lora::TensorDataType_FLOAT, shape, +// byte_span, fbs_param_1); +// params.push_back(fbs_param_1); +// +// byte_span = ReinterpretAsSpan(gsl::make_span(param_2)); +// lora::utils::SaveLoraParameter(builder, "param_2", lora::TensorDataType_FLOAT, shape, +// byte_span, fbs_param_2); +// params.push_back(fbs_param_2); +// +// auto fbs_params = builder.CreateVector(params); +// auto fbs_adapter = lora::CreateAdapter(builder, lora::kLoraFormatVersion, 1, 1, fbs_params); +// builder.Finish(fbs_adapter, lora::AdapterIdentifier()); +// +// constexpr const char* const file_name = +// "D:/dmitrism/Downloads/generate-test-model/param_conversion/lora_unit_test_adapter.fb"; +// std::ofstream file(file_name, std::ios::binary); +// ASSERT_TRUE(file.is_open()); +// +// ASSERT_FALSE(file.write(reinterpret_cast(builder.GetBufferPointer()), builder.GetSize()).fail()); +// ASSERT_FALSE(file.flush().fail()); +// file.close(); +// } + +TEST(LoraAdapterTest, Load) { + // XXX: put this into test directory + const std::filesystem::path file_path = "testdata/lora/lora_unit_test_adapter.fb"; + + auto verify_load = [](const lora::LoraAdapter& adapter) { + const auto param_num = adapter.GetParamNum(); + ASSERT_GE(param_num, 0U); + + std::vector names; + std::vector ort_values; + names.reserve(param_num); + ort_values.reserve(param_num); + + adapter.OutputAdaptersParameters(std::back_inserter(names), std::back_inserter(ort_values)); + ASSERT_EQ(param_num, names.size()); + ASSERT_EQ(param_num, ort_values.size()); + + for (size_t i = 0; i < param_num; ++i) { + const auto& name = names[i]; + const auto& ort_value = ort_values[i]; + ASSERT_TRUE(name != nullptr); + ASSERT_TRUE(ort_value.IsTensor()); + + const auto& tensor = ort_value.Get(); + ASSERT_EQ(tensor.GetElementType(), ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT); + + const auto shape = tensor.Shape().GetDims(); + ASSERT_EQ(2, shape.size()); + ASSERT_EQ(8, shape[0]); + ASSERT_EQ(4, shape[1]); + + // Read all the elements to make sure they are accessible + const auto data = tensor.DataAsSpan(); + for (size_t j = 0, lim = data.size(); j < lim; ++j) { + ASSERT_EQ(static_cast(j), data[j]); + } + } + }; + + { + lora::LoraAdapter lora_adapter; + lora_adapter.Load(file_path); + verify_load(lora_adapter); + } + + { + lora::LoraAdapter lora_adapter; + lora_adapter.MemoryMap(file_path); + verify_load(lora_adapter); + } +} + +} // namespace test +} // namespace onnxruntime diff --git a/onnxruntime/test/testdata/lora/lora_unit_test_adapter.fb b/onnxruntime/test/testdata/lora/lora_unit_test_adapter.fb new file mode 100644 index 0000000000000000000000000000000000000000..af696646efe6704d8568af6ec7a2cd7668906c4f GIT binary patch literal 432 zcmaLSEl2}l7zgmjL-4|dg$09QVZmUSVlW8a#z2LI1%p9w1A{@~5QD*R(YR<_G#HG= zMdPAz(P*@tzw@JsaKC&0ydS*JlSRaS_i&&@yQ-=s_1ChTFsIIa`;Y2``b@o6BK0Ct zG8!~lWsMeX*4d!LHaqN+)8~KzLykD+gb`<)b3ws1H{4S85*?&peSG)y6Jtdv##iIZ z)2+y&sozv>5b^gM_04k6FZWS0WyXT&nvWJ8a)yj3D7j<8JyRZd;+YvQ%z0(O8}EE{ L6A%19$7Xy2-Xc2# literal 0 HcmV?d00001 From bc86fabda0cf89d52f371ae322bb37c922cf1242 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 4 Sep 2024 11:42:45 -0700 Subject: [PATCH 25/84] Add test data creation code --- onnxruntime/test/lora/lora_test.cc | 40 +-------------- .../testdata/lora/lora_unit_test_adapter.cc | 51 +++++++++++++++++++ 2 files changed, 52 insertions(+), 39 deletions(-) create mode 100644 onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index 832056f35870e..96dce52a90652 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -11,46 +11,8 @@ namespace onnxruntime { namespace test { -// TEST(LoraFormatTest, CreateAdapter) { -// // Generate a random sequence of floats -// // shape = {8, 4} -// constexpr std::array shape = {8, 4}; -// std::vector param_1(32); -// std::iota(param_1.begin(), param_1.end(), 0.0f); -// -// std::vector param_2(32); -// std::iota(param_2.begin(), param_2.end(), 33.0f); -// -// flatbuffers::FlatBufferBuilder builder; -// std::vector> params; -// params.reserve(2); -// flatbuffers::Offset fbs_param_1, fbs_param_2; -// auto byte_span = ReinterpretAsSpan(gsl::make_span(param_1)); -// lora::utils::SaveLoraParameter(builder, "param_1", lora::TensorDataType_FLOAT, shape, -// byte_span, fbs_param_1); -// params.push_back(fbs_param_1); -// -// byte_span = ReinterpretAsSpan(gsl::make_span(param_2)); -// lora::utils::SaveLoraParameter(builder, "param_2", lora::TensorDataType_FLOAT, shape, -// byte_span, fbs_param_2); -// params.push_back(fbs_param_2); -// -// auto fbs_params = builder.CreateVector(params); -// auto fbs_adapter = lora::CreateAdapter(builder, lora::kLoraFormatVersion, 1, 1, fbs_params); -// builder.Finish(fbs_adapter, lora::AdapterIdentifier()); -// -// constexpr const char* const file_name = -// "D:/dmitrism/Downloads/generate-test-model/param_conversion/lora_unit_test_adapter.fb"; -// std::ofstream file(file_name, std::ios::binary); -// ASSERT_TRUE(file.is_open()); -// -// ASSERT_FALSE(file.write(reinterpret_cast(builder.GetBufferPointer()), builder.GetSize()).fail()); -// ASSERT_FALSE(file.flush().fail()); -// file.close(); -// } - TEST(LoraAdapterTest, Load) { - // XXX: put this into test directory + // See file creation code at testdata/lora/lora_unit_test_adapter.cc const std::filesystem::path file_path = "testdata/lora/lora_unit_test_adapter.fb"; auto verify_load = [](const lora::LoraAdapter& adapter) { diff --git a/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc b/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc new file mode 100644 index 0000000000000..1bd714de9ba85 --- /dev/null +++ b/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc @@ -0,0 +1,51 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "lora/lora_format_version.h" +#include "lora/lora_format_utils.h" +#include "gtest/gtest.h" + +#include + +namespace onnxruntime { +namespace test { + +TEST(LoraFormatTest, CreateAdapter) { + // generate a random sequence of floats + // shape = {8, 4} + constexpr std::array shape = {8, 4}; + std::vector param_1(32); + std::iota(param_1.begin(), param_1.end(), 0.0f); + + std::vector param_2(32); + std::iota(param_2.begin(), param_2.end(), 33.0f); + + flatbuffers::flatbufferbuilder builder; + std::vector> params; + params.reserve(2); + flatbuffers::offset fbs_param_1, fbs_param_2; + auto byte_span = reinterpretasspan(gsl::make_span(param_1)); + lora::utils::saveloraparameter(builder, "param_1", lora::tensordatatype_float, shape, + byte_span, fbs_param_1); + params.push_back(fbs_param_1); + + byte_span = reinterpretasspan(gsl::make_span(param_2)); + lora::utils::saveloraparameter(builder, "param_2", lora::tensordatatype_float, shape, + byte_span, fbs_param_2); + params.push_back(fbs_param_2); + + auto fbs_params = builder.createvector(params); + auto fbs_adapter = lora::createadapter(builder, lora::kloraformatversion, 1, 1, fbs_params); + builder.finish(fbs_adapter, lora::adapteridentifier()); + + constexpr const char* const file_name = + "d:/dmitrism/downloads/generate-test-model/param_conversion/lora_unit_test_adapter.fb"; + std::ofstream file(file_name, std::ios::binary); + assert_true(file.is_open()); + + assert_false(file.write(reinterpret_cast(builder.getbufferpointer()), builder.getsize()).fail()); + assert_false(file.flush().fail()); + file.close(); +} +} +} \ No newline at end of file From aab98b01b0e4191279128a73acac7a16790200ec Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 4 Sep 2024 11:45:40 -0700 Subject: [PATCH 26/84] Use inlined vector --- onnxruntime/test/lora/lora_test.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index 96dce52a90652..e4a2b02a03bc2 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#include "core/common/inlined_containers_fwd.h" #include "lora/lora_adapters.h" #include "lora/lora_format_version.h" #include "lora/lora_format_utils.h" @@ -19,8 +20,8 @@ TEST(LoraAdapterTest, Load) { const auto param_num = adapter.GetParamNum(); ASSERT_GE(param_num, 0U); - std::vector names; - std::vector ort_values; + InlinedVector names; + InlinedVector ort_values; names.reserve(param_num); ort_values.reserve(param_num); From 138ab0da8b6bf300b7641c623284bddbeb783f27 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 4 Sep 2024 14:10:54 -0700 Subject: [PATCH 27/84] Add vector forced alignemtn --- onnxruntime/lora/lora_format/README.md | 2 +- onnxruntime/lora/lora_format/lora_schema.fbs | 2 +- .../lora/lora_format/lora_schema.fbs.h | 97 ++++++++++--------- onnxruntime/lora/lora_format_utils.cc | 4 +- .../testdata/lora/lora_unit_test_adapter.cc | 29 +++--- 5 files changed, 69 insertions(+), 65 deletions(-) diff --git a/onnxruntime/lora/lora_format/README.md b/onnxruntime/lora/lora_format/README.md index ec39974464c1c..d28f47186cbea 100644 --- a/onnxruntime/lora/lora_format/README.md +++ b/onnxruntime/lora/lora_format/README.md @@ -20,7 +20,7 @@ It is possible to use another flatc as well, e.g., from a separate installation. To update the flatbuffers schemas and generated files: 1. Modify [ONNXRuntime Lora Parameter file format schema](lora_schema.fbs). -2. Run [compile_schema.py](./compile_schema.py) to generate the C++ and Python bindings. +2. Run [compile_schema.py](./compile_schema.py) to generate the C++ bindings. ``` python onnxruntime/lora/lora_format/compile_schema.py --flatc diff --git a/onnxruntime/lora/lora_format/lora_schema.fbs b/onnxruntime/lora/lora_format/lora_schema.fbs index 073fe0945517d..37e8195dab6f2 100644 --- a/onnxruntime/lora/lora_format/lora_schema.fbs +++ b/onnxruntime/lora/lora_format/lora_schema.fbs @@ -37,7 +37,7 @@ table Parameter { dims:[int64]; data_type:TensorDataType; - raw_data:[uint8]; + raw_data:[uint8] (force_align : 8); } table Adapter { diff --git a/onnxruntime/lora/lora_format/lora_schema.fbs.h b/onnxruntime/lora/lora_format/lora_schema.fbs.h index 72b27dd355814..a75082af811fc 100644 --- a/onnxruntime/lora/lora_format/lora_schema.fbs.h +++ b/onnxruntime/lora/lora_format/lora_schema.fbs.h @@ -22,55 +22,55 @@ struct ParameterBuilder; struct Adapter; struct AdapterBuilder; -enum TensorDataType : int32_t { - TensorDataType_UNDEFINED = 0, - TensorDataType_FLOAT = 1, - TensorDataType_UINT8 = 2, - TensorDataType_INT8 = 3, - TensorDataType_UINT16 = 4, - TensorDataType_INT16 = 5, - TensorDataType_INT32 = 6, - TensorDataType_INT64 = 7, - TensorDataType_STRING = 8, - TensorDataType_BOOL = 9, - TensorDataType_FLOAT16 = 10, - TensorDataType_DOUBLE = 11, - TensorDataType_UINT32 = 12, - TensorDataType_UINT64 = 13, - TensorDataType_COMPLEX64 = 14, - TensorDataType_COMPLEX128 = 15, - TensorDataType_BFLOAT16 = 16, - TensorDataType_FLOAT8E4M3FN = 17, - TensorDataType_FLOAT8E4M3FNUZ = 18, - TensorDataType_FLOAT8E5M2 = 19, - TensorDataType_FLOAT8E5M2FNUZ = 20, - TensorDataType_MIN = TensorDataType_UNDEFINED, - TensorDataType_MAX = TensorDataType_FLOAT8E5M2FNUZ +enum class TensorDataType : int32_t { + UNDEFINED = 0, + FLOAT = 1, + UINT8 = 2, + INT8 = 3, + UINT16 = 4, + INT16 = 5, + INT32 = 6, + INT64 = 7, + STRING = 8, + BOOL = 9, + FLOAT16 = 10, + DOUBLE = 11, + UINT32 = 12, + UINT64 = 13, + COMPLEX64 = 14, + COMPLEX128 = 15, + BFLOAT16 = 16, + FLOAT8E4M3FN = 17, + FLOAT8E4M3FNUZ = 18, + FLOAT8E5M2 = 19, + FLOAT8E5M2FNUZ = 20, + MIN = UNDEFINED, + MAX = FLOAT8E5M2FNUZ }; inline const TensorDataType (&EnumValuesTensorDataType())[21] { static const TensorDataType values[] = { - TensorDataType_UNDEFINED, - TensorDataType_FLOAT, - TensorDataType_UINT8, - TensorDataType_INT8, - TensorDataType_UINT16, - TensorDataType_INT16, - TensorDataType_INT32, - TensorDataType_INT64, - TensorDataType_STRING, - TensorDataType_BOOL, - TensorDataType_FLOAT16, - TensorDataType_DOUBLE, - TensorDataType_UINT32, - TensorDataType_UINT64, - TensorDataType_COMPLEX64, - TensorDataType_COMPLEX128, - TensorDataType_BFLOAT16, - TensorDataType_FLOAT8E4M3FN, - TensorDataType_FLOAT8E4M3FNUZ, - TensorDataType_FLOAT8E5M2, - TensorDataType_FLOAT8E5M2FNUZ + TensorDataType::UNDEFINED, + TensorDataType::FLOAT, + TensorDataType::UINT8, + TensorDataType::INT8, + TensorDataType::UINT16, + TensorDataType::INT16, + TensorDataType::INT32, + TensorDataType::INT64, + TensorDataType::STRING, + TensorDataType::BOOL, + TensorDataType::FLOAT16, + TensorDataType::DOUBLE, + TensorDataType::UINT32, + TensorDataType::UINT64, + TensorDataType::COMPLEX64, + TensorDataType::COMPLEX128, + TensorDataType::BFLOAT16, + TensorDataType::FLOAT8E4M3FN, + TensorDataType::FLOAT8E4M3FNUZ, + TensorDataType::FLOAT8E5M2, + TensorDataType::FLOAT8E5M2FNUZ }; return values; } @@ -104,7 +104,7 @@ inline const char * const *EnumNamesTensorDataType() { } inline const char *EnumNameTensorDataType(TensorDataType e) { - if (::flatbuffers::IsOutRange(e, TensorDataType_UNDEFINED, TensorDataType_FLOAT8E5M2FNUZ)) return ""; + if (::flatbuffers::IsOutRange(e, TensorDataType::UNDEFINED, TensorDataType::FLOAT8E5M2FNUZ)) return ""; const size_t index = static_cast(e); return EnumNamesTensorDataType()[index]; } @@ -173,7 +173,7 @@ inline ::flatbuffers::Offset CreateParameter( ::flatbuffers::FlatBufferBuilder &_fbb, ::flatbuffers::Offset<::flatbuffers::String> name = 0, ::flatbuffers::Offset<::flatbuffers::Vector> dims = 0, - onnxruntime::lora::TensorDataType data_type = onnxruntime::lora::TensorDataType_UNDEFINED, + onnxruntime::lora::TensorDataType data_type = onnxruntime::lora::TensorDataType::UNDEFINED, ::flatbuffers::Offset<::flatbuffers::Vector> raw_data = 0) { ParameterBuilder builder_(_fbb); builder_.add_raw_data(raw_data); @@ -187,10 +187,11 @@ inline ::flatbuffers::Offset CreateParameterDirect( ::flatbuffers::FlatBufferBuilder &_fbb, const char *name = nullptr, const std::vector *dims = nullptr, - onnxruntime::lora::TensorDataType data_type = onnxruntime::lora::TensorDataType_UNDEFINED, + onnxruntime::lora::TensorDataType data_type = onnxruntime::lora::TensorDataType::UNDEFINED, const std::vector *raw_data = nullptr) { auto name__ = name ? _fbb.CreateString(name) : 0; auto dims__ = dims ? _fbb.CreateVector(*dims) : 0; + if (raw_data) { _fbb.ForceVectorAlignment(raw_data->size(), sizeof(uint8_t), 8); } auto raw_data__ = raw_data ? _fbb.CreateVector(*raw_data) : 0; return onnxruntime::lora::CreateParameter( _fbb, diff --git a/onnxruntime/lora/lora_format_utils.cc b/onnxruntime/lora/lora_format_utils.cc index b9710af4a570e..75604dd62cf4d 100644 --- a/onnxruntime/lora/lora_format_utils.cc +++ b/onnxruntime/lora/lora_format_utils.cc @@ -104,7 +104,9 @@ std::pair CreateOrtValueOverLoraParameter(const Parameter OrtMemoryInfo cpu_meminfo(CPU, OrtAllocatorType::OrtDeviceAllocator); - Tensor::InitOrtValue(DataTypeImpl::TensorTypeFromONNXEnum(data_type)->GetElementType(), + auto elem_type = DataTypeImpl::TensorTypeFromONNXEnum(static_cast(data_type))->GetElementType(); + // const_cast is necessery due to Tensor class API + Tensor::InitOrtValue(elem_type, TensorShape(shape_span), const_cast(param.raw_data()->data()), cpu_meminfo, diff --git a/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc b/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc index 1bd714de9ba85..a72d2a77c4325 100644 --- a/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc +++ b/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc @@ -10,7 +10,7 @@ namespace onnxruntime { namespace test { -TEST(LoraFormatTest, CreateAdapter) { +EST(LoraFormatTest, CreateAdapter) { // generate a random sequence of floats // shape = {8, 4} constexpr std::array shape = {8, 4}; @@ -20,32 +20,33 @@ TEST(LoraFormatTest, CreateAdapter) { std::vector param_2(32); std::iota(param_2.begin(), param_2.end(), 33.0f); - flatbuffers::flatbufferbuilder builder; - std::vector> params; + flatbuffers::FlatBufferBuilder builder; + std::vector> params; params.reserve(2); - flatbuffers::offset fbs_param_1, fbs_param_2; - auto byte_span = reinterpretasspan(gsl::make_span(param_1)); - lora::utils::saveloraparameter(builder, "param_1", lora::tensordatatype_float, shape, + flatbuffers::Offset fbs_param_1, fbs_param_2; + auto byte_span = ReinterpretAsSpan(gsl::make_span(param_1)); + lora::utils::SaveLoraParameter(builder, "param_1", lora::TensorDataType::FLOAT, shape, byte_span, fbs_param_1); params.push_back(fbs_param_1); - byte_span = reinterpretasspan(gsl::make_span(param_2)); - lora::utils::saveloraparameter(builder, "param_2", lora::tensordatatype_float, shape, + byte_span = ReinterpretAsSpan(gsl::make_span(param_2)); + lora::utils::SaveLoraParameter(builder, "param_2", lora::TensorDataType::FLOAT, shape, byte_span, fbs_param_2); params.push_back(fbs_param_2); - auto fbs_params = builder.createvector(params); - auto fbs_adapter = lora::createadapter(builder, lora::kloraformatversion, 1, 1, fbs_params); - builder.finish(fbs_adapter, lora::adapteridentifier()); + auto fbs_params = builder.CreateVector(params); + auto fbs_adapter = lora::CreateAdapter(builder, lora::kLoraFormatVersion, 1, 1, fbs_params); + builder.Finish(fbs_adapter, lora::AdapterIdentifier()); constexpr const char* const file_name = "d:/dmitrism/downloads/generate-test-model/param_conversion/lora_unit_test_adapter.fb"; std::ofstream file(file_name, std::ios::binary); - assert_true(file.is_open()); + ASSERT_TRUE(file.is_open()); - assert_false(file.write(reinterpret_cast(builder.getbufferpointer()), builder.getsize()).fail()); - assert_false(file.flush().fail()); + ASSERT_FALSE(file.write(reinterpret_cast(builder.GetBufferPointer()), builder.GetSize()).fail()); + ASSERT_FALSE(file.flush().fail()); file.close(); } + } } \ No newline at end of file From e86bd0d3ed3b78cada788df9b442a9350750e636 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 5 Sep 2024 11:30:28 -0700 Subject: [PATCH 28/84] Add Load --- onnxruntime/lora/lora_adapters.cc | 5 +- onnxruntime/lora/lora_adapters.h | 30 ++++ onnxruntime/test/lora/lora_test.cc | 151 +++++++++++++----- .../testdata/lora/lora_unit_test_adapter.cc | 4 + 4 files changed, 153 insertions(+), 37 deletions(-) diff --git a/onnxruntime/lora/lora_adapters.cc b/onnxruntime/lora/lora_adapters.cc index e2214a15d2563..45b27365a116f 100644 --- a/onnxruntime/lora/lora_adapters.cc +++ b/onnxruntime/lora/lora_adapters.cc @@ -19,9 +19,12 @@ LoraAdapter::LoraParam::LoraParam(std::string name, OrtValue ort_value) noexcept void LoraAdapter::Load(const std::filesystem::path& file_path) { auto buffer = utils::LoadLoraAdapterBytes(file_path); + Load(std::move(buffer)); +} + +void LoraAdapter::Load(std::vector buffer) { adapter_ = utils::ValidateAndGetAdapterFromBytes(buffer); buffer_.emplace(std::move(buffer)); - InitializeParamsValues(); } diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h index a943e92175030..45519e4081cd3 100644 --- a/onnxruntime/lora/lora_adapters.h +++ b/onnxruntime/lora/lora_adapters.h @@ -37,6 +37,12 @@ class LoraAdapter { /// file name that can be opened void Load(const std::filesystem::path& file_path); + /// + /// Load parameters from serialized bytes and validates its format. + /// + /// + void Load(std::vector buffer); + /// /// Memory maps adapter file into memory and validates its format. /// @@ -52,6 +58,30 @@ class LoraAdapter { return params_values_.size(); } + /// + /// Gets lora format version + /// + /// + int LoraFormatVersion() const noexcept { + return adapter_->format_version(); + } + + /// + /// Gets adapter version + /// + /// + int AdapterVersion() const noexcept { + return adapter_->adapter_version(); + } + + /// + /// Gets model version for which the adapter was created + /// + /// + int ModelVersion() const noexcept { + return adapter_->model_version(); + } + /// /// Outputs Lora Parameters, their names and values /// into the supplied output iterators. diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index e4a2b02a03bc2..7a62940820618 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -2,62 +2,141 @@ // Licensed under the MIT License. #include "core/common/inlined_containers_fwd.h" +#include "core/framework/data_types_internal.h" #include "lora/lora_adapters.h" #include "lora/lora_format_version.h" #include "lora/lora_format_utils.h" #include "gtest/gtest.h" -#include +#include namespace onnxruntime { namespace test { +namespace { + +constexpr const int kAdapterVersion = 1; +constexpr const int kModelVersion = 1; + +template +struct ReadAndValidateData { + void operator()(const Tensor& parameter) const { + auto data = parameter.DataAsSpan(); + for (size_t i = 0, size = data.size(); i < size; ++i) { + ASSERT_EQ(static_cast(i), data[i]); + } + } +}; + +template <> +struct ReadAndValidateData { + void operator()(const Tensor& parameter) const { + auto data = parameter.DataAsSpan(); + for (size_t i = 0, size = data.size(); i < size; ++i) { + ASSERT_FALSE(std::isnan(data[i])); + ASSERT_TRUE(std::isfinite(data[i])); + ASSERT_EQ(static_cast(i), data[i]); + } + } +}; + +template <> +struct ReadAndValidateData { + void operator()(const Tensor& parameter) const { + auto data = parameter.DataAsSpan(); + for (size_t i = 0, size = data.size(); i < size; ++i) { + ASSERT_FALSE(std::isnan(data[i])); + ASSERT_TRUE(std::isfinite(data[i])); + ASSERT_EQ(static_cast(i), data[i]); + } + } +}; + + +template<> +struct ReadAndValidateData { + void operator()(const Tensor& parameter) const { + auto data = parameter.DataAsSpan(); + for (size_t i = 0, size = data.size(); i < size; ++i) { + ASSERT_FALSE(data[i].IsNaN()); + ASSERT_FALSE(data[i].IsInfinity()); + ASSERT_EQ(static_cast(i), data[i].ToFloat()); + } + } +}; + +template <> +struct ReadAndValidateData { + void operator()(const Tensor& parameter) const { + auto data = parameter.DataAsSpan(); + for (size_t i = 0, size = data.size(); i < size; ++i) { + ASSERT_FALSE(data[i].IsNaN()); + ASSERT_FALSE(data[i].IsInfinity()); + ASSERT_EQ(static_cast(i), data[i].ToFloat()); + } + } +}; + +auto verify_load = [](const lora::LoraAdapter& adapter) { + ASSERT_EQ(kAdapterVersion, adapter.AdapterVersion()); + ASSERT_EQ(kModelVersion, adapter.ModelVersion()); + + const auto param_num = adapter.GetParamNum(); + ASSERT_GE(param_num, 0U); + + InlinedVector names; + InlinedVector ort_values; + names.reserve(param_num); + ort_values.reserve(param_num); + + adapter.OutputAdaptersParameters(std::back_inserter(names), std::back_inserter(ort_values)); + ASSERT_EQ(param_num, names.size()); + ASSERT_EQ(param_num, ort_values.size()); + + for (size_t i = 0; i < param_num; ++i) { + const auto& name = names[i]; + const auto& ort_value = ort_values[i]; + ASSERT_TRUE(name != nullptr); + ASSERT_TRUE(ort_value.IsTensor()); + + const auto& tensor = ort_value.Get(); + ASSERT_NE(tensor.GetElementType(), ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED); + + const auto shape = tensor.Shape().GetDims(); + ASSERT_EQ(2, shape.size()); + ASSERT_EQ(8, shape[0]); + ASSERT_EQ(4, shape[1]); + + // Read all the elements to make sure they are accessible + // only on CPU + const auto& mem_info = tensor.Location(); + if (mem_info.device.Type() == OrtDevice::CPU) { + utils::MLTypeCallDispatcher + disp(tensor.GetElementType()); + disp.Invoke(tensor); + } + } +}; + +} // namespace + TEST(LoraAdapterTest, Load) { // See file creation code at testdata/lora/lora_unit_test_adapter.cc + // This is float const std::filesystem::path file_path = "testdata/lora/lora_unit_test_adapter.fb"; - auto verify_load = [](const lora::LoraAdapter& adapter) { - const auto param_num = adapter.GetParamNum(); - ASSERT_GE(param_num, 0U); - - InlinedVector names; - InlinedVector ort_values; - names.reserve(param_num); - ort_values.reserve(param_num); - - adapter.OutputAdaptersParameters(std::back_inserter(names), std::back_inserter(ort_values)); - ASSERT_EQ(param_num, names.size()); - ASSERT_EQ(param_num, ort_values.size()); - - for (size_t i = 0; i < param_num; ++i) { - const auto& name = names[i]; - const auto& ort_value = ort_values[i]; - ASSERT_TRUE(name != nullptr); - ASSERT_TRUE(ort_value.IsTensor()); - - const auto& tensor = ort_value.Get(); - ASSERT_EQ(tensor.GetElementType(), ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT); - - const auto shape = tensor.Shape().GetDims(); - ASSERT_EQ(2, shape.size()); - ASSERT_EQ(8, shape[0]); - ASSERT_EQ(4, shape[1]); - - // Read all the elements to make sure they are accessible - const auto data = tensor.DataAsSpan(); - for (size_t j = 0, lim = data.size(); j < lim; ++j) { - ASSERT_EQ(static_cast(j), data[j]); - } - } - }; - { + // Test memory load lora::LoraAdapter lora_adapter; lora_adapter.Load(file_path); verify_load(lora_adapter); } { + // Test memory map lora::LoraAdapter lora_adapter; lora_adapter.MemoryMap(file_path); verify_load(lora_adapter); diff --git a/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc b/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc index a72d2a77c4325..b185ba676cb73 100644 --- a/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc +++ b/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc @@ -10,6 +10,10 @@ namespace onnxruntime { namespace test { +constexpr const int kAdapterVersion = 1; +constexpr const int kModelVersion = 1; + + EST(LoraFormatTest, CreateAdapter) { // generate a random sequence of floats // shape = {8, 4} From 1e47b50fc905b00a8eab9b2a58e74a868b826c14 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 5 Sep 2024 13:41:18 -0700 Subject: [PATCH 29/84] Make test in memory --- onnxruntime/test/lora/lora_test.cc | 103 ++++++++++++++---- .../testdata/lora/lora_unit_test_adapter.cc | 56 ---------- .../testdata/lora/lora_unit_test_adapter.fb | Bin 432 -> 0 bytes 3 files changed, 83 insertions(+), 76 deletions(-) delete mode 100644 onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc delete mode 100644 onnxruntime/test/testdata/lora/lora_unit_test_adapter.fb diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index 7a62940820618..9015043554e80 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -3,6 +3,8 @@ #include "core/common/inlined_containers_fwd.h" #include "core/framework/data_types_internal.h" +#include "core/framework/to_tensor_proto_element_type.h" + #include "lora/lora_adapters.h" #include "lora/lora_format_version.h" #include "lora/lora_format_utils.h" @@ -22,7 +24,7 @@ template struct ReadAndValidateData { void operator()(const Tensor& parameter) const { auto data = parameter.DataAsSpan(); - for (size_t i = 0, size = data.size(); i < size; ++i) { + for (size_t i = static_cast(data[0]), size = data.size(); i < size; ++i) { ASSERT_EQ(static_cast(i), data[i]); } } @@ -32,7 +34,7 @@ template <> struct ReadAndValidateData { void operator()(const Tensor& parameter) const { auto data = parameter.DataAsSpan(); - for (size_t i = 0, size = data.size(); i < size; ++i) { + for (size_t i = static_cast(data[0]), size = data.size(); i < size; ++i) { ASSERT_FALSE(std::isnan(data[i])); ASSERT_TRUE(std::isfinite(data[i])); ASSERT_EQ(static_cast(i), data[i]); @@ -44,7 +46,7 @@ template <> struct ReadAndValidateData { void operator()(const Tensor& parameter) const { auto data = parameter.DataAsSpan(); - for (size_t i = 0, size = data.size(); i < size; ++i) { + for (size_t i = static_cast(data[0]), size = data.size(); i < size; ++i) { ASSERT_FALSE(std::isnan(data[i])); ASSERT_TRUE(std::isfinite(data[i])); ASSERT_EQ(static_cast(i), data[i]); @@ -52,12 +54,11 @@ struct ReadAndValidateData { } }; - -template<> +template <> struct ReadAndValidateData { void operator()(const Tensor& parameter) const { auto data = parameter.DataAsSpan(); - for (size_t i = 0, size = data.size(); i < size; ++i) { + for (size_t i = static_cast(data[0].ToFloat()), size = data.size(); i < size; ++i) { ASSERT_FALSE(data[i].IsNaN()); ASSERT_FALSE(data[i].IsInfinity()); ASSERT_EQ(static_cast(i), data[i].ToFloat()); @@ -69,7 +70,7 @@ template <> struct ReadAndValidateData { void operator()(const Tensor& parameter) const { auto data = parameter.DataAsSpan(); - for (size_t i = 0, size = data.size(); i < size; ++i) { + for (size_t i = static_cast(data[0].ToFloat()), size = data.size(); i < size; ++i) { ASSERT_FALSE(data[i].IsNaN()); ASSERT_FALSE(data[i].IsInfinity()); ASSERT_EQ(static_cast(i), data[i].ToFloat()); @@ -121,26 +122,88 @@ auto verify_load = [](const lora::LoraAdapter& adapter) { } }; -} // namespace +constexpr const std::array param_shape = {8, 4}; -TEST(LoraAdapterTest, Load) { - // See file creation code at testdata/lora/lora_unit_test_adapter.cc - // This is float - const std::filesystem::path file_path = "testdata/lora/lora_unit_test_adapter.fb"; +template +struct CreateParam { + InlinedVector operator()() const { + InlinedVector param(32); + std::iota(param.begin(), param.end(), T{0}); + return param; + } +}; - { - // Test memory load - lora::LoraAdapter lora_adapter; - lora_adapter.Load(file_path); - verify_load(lora_adapter); +template +struct GenerateTestParameters { + std::vector operator()() const { + constexpr const auto data_type = utils::ToTensorProtoElementType(); + + InlinedVector param_1(32); + InlinedVector param_2(32); + if constexpr (std::is_same::value || std::is_same::value) { + for (float f = 0.f; f < 32; ++f) { + param_1[static_cast(f)] = static_cast(f); + param_2[static_cast(f)] = static_cast(f + 32); + } + } else { + std::iota(param_1.begin(), param_1.end(), T{0}); + std::iota(param_2.begin(), param_2.end(), T{32}); + } + + flatbuffers::FlatBufferBuilder builder; + std::vector> params; + params.reserve(2); + + flatbuffers::Offset fbs_param_1, fbs_param_2; + auto byte_span = ReinterpretAsSpan(gsl::make_span(param_1)); + lora::utils::SaveLoraParameter(builder, "param_1", static_cast(data_type), param_shape, + byte_span, fbs_param_1); + params.push_back(fbs_param_1); + + byte_span = ReinterpretAsSpan(gsl::make_span(param_2)); + lora::utils::SaveLoraParameter(builder, "param_2", static_cast(data_type), param_shape, + byte_span, fbs_param_2); + params.push_back(fbs_param_2); + + auto fbs_params = builder.CreateVector(params); + auto fbs_adapter = lora::CreateAdapter(builder, lora::kLoraFormatVersion, kAdapterVersion, kModelVersion, + fbs_params); + builder.Finish(fbs_adapter, lora::AdapterIdentifier()); + + std::vector result; + result.reserve(builder.GetSize()); + gsl::span buffer(builder.GetBufferPointer(), builder.GetSize()); + std::copy(buffer.begin(), buffer.end(), std::back_inserter(result)); + return result; } +}; - { - // Test memory map +template +struct TestDataType { + void operator()() const { + const auto test_params = GenerateTestParameters()(); lora::LoraAdapter lora_adapter; - lora_adapter.MemoryMap(file_path); + lora_adapter.Load(std::move(test_params)); verify_load(lora_adapter); } +}; + +} // namespace + +TEST(LoraAdapterTest, Load) { + // Test different data types + const auto data_types = gsl::make_span(lora::EnumValuesTensorDataType()); + for (size_t i = 1, size = data_types.size(); i < size; ++i) { + if (i == 8 || i == 9 || i == 14 || i == 15 || (i > 16 && i < 21)) + continue; + + utils::MLTypeCallDispatcher + disp(static_cast(data_types[i])); + disp.Invoke(); + } } } // namespace test diff --git a/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc b/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc deleted file mode 100644 index b185ba676cb73..0000000000000 --- a/onnxruntime/test/testdata/lora/lora_unit_test_adapter.cc +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#include "lora/lora_format_version.h" -#include "lora/lora_format_utils.h" -#include "gtest/gtest.h" - -#include - -namespace onnxruntime { -namespace test { - -constexpr const int kAdapterVersion = 1; -constexpr const int kModelVersion = 1; - - -EST(LoraFormatTest, CreateAdapter) { - // generate a random sequence of floats - // shape = {8, 4} - constexpr std::array shape = {8, 4}; - std::vector param_1(32); - std::iota(param_1.begin(), param_1.end(), 0.0f); - - std::vector param_2(32); - std::iota(param_2.begin(), param_2.end(), 33.0f); - - flatbuffers::FlatBufferBuilder builder; - std::vector> params; - params.reserve(2); - flatbuffers::Offset fbs_param_1, fbs_param_2; - auto byte_span = ReinterpretAsSpan(gsl::make_span(param_1)); - lora::utils::SaveLoraParameter(builder, "param_1", lora::TensorDataType::FLOAT, shape, - byte_span, fbs_param_1); - params.push_back(fbs_param_1); - - byte_span = ReinterpretAsSpan(gsl::make_span(param_2)); - lora::utils::SaveLoraParameter(builder, "param_2", lora::TensorDataType::FLOAT, shape, - byte_span, fbs_param_2); - params.push_back(fbs_param_2); - - auto fbs_params = builder.CreateVector(params); - auto fbs_adapter = lora::CreateAdapter(builder, lora::kLoraFormatVersion, 1, 1, fbs_params); - builder.Finish(fbs_adapter, lora::AdapterIdentifier()); - - constexpr const char* const file_name = - "d:/dmitrism/downloads/generate-test-model/param_conversion/lora_unit_test_adapter.fb"; - std::ofstream file(file_name, std::ios::binary); - ASSERT_TRUE(file.is_open()); - - ASSERT_FALSE(file.write(reinterpret_cast(builder.GetBufferPointer()), builder.GetSize()).fail()); - ASSERT_FALSE(file.flush().fail()); - file.close(); -} - -} -} \ No newline at end of file diff --git a/onnxruntime/test/testdata/lora/lora_unit_test_adapter.fb b/onnxruntime/test/testdata/lora/lora_unit_test_adapter.fb deleted file mode 100644 index af696646efe6704d8568af6ec7a2cd7668906c4f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 432 zcmaLSEl2}l7zgmjL-4|dg$09QVZmUSVlW8a#z2LI1%p9w1A{@~5QD*R(YR<_G#HG= zMdPAz(P*@tzw@JsaKC&0ydS*JlSRaS_i&&@yQ-=s_1ChTFsIIa`;Y2``b@o6BK0Ct zG8!~lWsMeX*4d!LHaqN+)8~KzLykD+gb`<)b3ws1H{4S85*?&peSG)y6Jtdv##iIZ z)2+y&sozv>5b^gM_04k6FZWS0WyXT&nvWJ8a)yj3D7j<8JyRZd;+YvQ%z0(O8}EE{ L6A%19$7Xy2-Xc2# From a7c0ddb86c9437724a8d6005ba83deaf72f54309 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 5 Sep 2024 14:53:18 -0700 Subject: [PATCH 30/84] Fix name moving --- onnxruntime/lora/lora_adapters.cc | 7 ++++--- onnxruntime/lora/lora_adapters.h | 3 +-- onnxruntime/test/lora/lora_test.cc | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/onnxruntime/lora/lora_adapters.cc b/onnxruntime/lora/lora_adapters.cc index 45b27365a116f..90fae678f7681 100644 --- a/onnxruntime/lora/lora_adapters.cc +++ b/onnxruntime/lora/lora_adapters.cc @@ -14,8 +14,8 @@ namespace onnxruntime { namespace lora { -LoraAdapter::LoraParam::LoraParam(std::string name, OrtValue ort_value) noexcept - : name_(std::move(name)), ort_value_(std::move(ort_value)) {} +LoraAdapter::LoraParam::LoraParam(OrtValue ort_value) noexcept + : ort_value_(std::move(ort_value)) {} void LoraAdapter::Load(const std::filesystem::path& file_path) { auto buffer = utils::LoadLoraAdapterBytes(file_path); @@ -47,7 +47,8 @@ void LoraAdapter::InitializeParamsValues() { params_values.reserve(params->size()); for (const auto* param : *params) { auto [name, ort_value] = utils::CreateOrtValueOverLoraParameter(*param); - params_values.emplace(name, LoraParam(std::move(name), std::move(ort_value))); + LoraParam lora_param(std::move(ort_value)); + params_values.emplace(std::move(name), std::move(lora_param)); } params_values_.swap(params_values); } diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h index 45519e4081cd3..e90e06f526b9b 100644 --- a/onnxruntime/lora/lora_adapters.h +++ b/onnxruntime/lora/lora_adapters.h @@ -125,9 +125,8 @@ class LoraAdapter { /// struct LoraParam { LoraParam() = default; - LoraParam(std::string name, OrtValue parameter) noexcept; + explicit LoraParam(OrtValue parameter) noexcept; - std::string name_; OrtValue ort_value_; }; diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index 9015043554e80..6393cbc697030 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -83,7 +83,7 @@ auto verify_load = [](const lora::LoraAdapter& adapter) { ASSERT_EQ(kModelVersion, adapter.ModelVersion()); const auto param_num = adapter.GetParamNum(); - ASSERT_GE(param_num, 0U); + ASSERT_EQ(param_num, 2U); InlinedVector names; InlinedVector ort_values; From 321a92f77564a0ce230c0f715ef8ac5404f19bf9 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 5 Sep 2024 15:09:18 -0700 Subject: [PATCH 31/84] Add OrtAllocator parameter --- .../onnxruntime/core/session/onnxruntime_c_api.h | 7 +++++-- .../onnxruntime/core/session/onnxruntime_cxx_api.h | 4 +++- .../core/session/onnxruntime_cxx_inline.h | 5 +++-- onnxruntime/core/session/ort_apis.h | 4 ++-- onnxruntime/lora/lora_adapters.cc | 10 +++++++--- onnxruntime/lora/lora_adapters.h | 14 ++++++++++---- onnxruntime/lora/lora_format_utils.cc | 12 +++++++++++- onnxruntime/lora/lora_format_utils.h | 9 +++++++++ 8 files changed, 50 insertions(+), 15 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index fb6d461645253..9dde2991636f7 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -4680,16 +4680,19 @@ struct OrtApi { * the platform does not support memory mapping, in which case the file will be read into memory. * * \param[in] adapter_file_path Absolute file path to the adapter file. + * \param[in] allocator optional pointer to a device allocator. If specified + * data is copied to the device at some point before Run() is invoked, if nullptr data stays on CPU. + * The data would still be copied to device if required by the model at inference time. * \param[out] out A pointer to a newly created OrtLoraAdapter instance. Must be released with * OrtApi::ReleaseLoraAdapter. */ - ORT_API2_STATUS(CreateLoraAdapter, const ORTCHAR_T* adapter_file_path, _Outptr_ OrtLoraAdapter** out); + ORT_API2_STATUS(CreateLoraAdapter, const ORTCHAR_T* adapter_file_path, _In_ OrtAllocator* allocator, + _Outptr_ OrtLoraAdapter** out); /** \brief Release an ::OrtLoraAdapter obtained from OrtApi::CreateLoraAdapter */ ORT_CLASS_RELEASE(LoraAdapter); - /** \brief Set the active Lora Adapter for the run options * * The function sets the active Lora Adapter for the run options. The Lora Adapter must be created with diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h index b8f61d2e3d22f..468317099cd09 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h @@ -743,7 +743,9 @@ struct LoraAdapter : detail::Base { /// /// The function attempts to load the adapter from the specified file /// \param absolute_adapter_path The absolute path to the Lora adapter - explicit LoraAdapter(const std::basic_string& absolute_adapter_path); + /// \param allocator optional pointer to a device allocator. If nullptr, the data stays on CPU. It would still + /// be copied to device if required by the model at inference time. + explicit LoraAdapter(const std::basic_string& absolute_adapter_path, OrtAllocator* allocator); }; /** \brief RunOptions diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h index f7290aa610ff4..805a2553a0530 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h @@ -557,8 +557,8 @@ inline void CustomOpDomain::Add(const OrtCustomOp* op) { ThrowOnError(GetApi().CustomOpDomain_Add(p_, op)); } -inline LoraAdapter::LoraAdapter(const std::basic_string& absolute_adapter_path) { - ThrowOnError(GetApi().CreateLoraAdapter(absolute_adapter_path.c_str(), &p_)); +inline LoraAdapter::LoraAdapter(const std::basic_string& absolute_adapter_path, OrtAllocator* allocator) { + ThrowOnError(GetApi().CreateLoraAdapter(absolute_adapter_path.c_str(), allocator, &p_)); } inline RunOptions::RunOptions() { @@ -615,6 +615,7 @@ inline RunOptions& RunOptions::UnsetTerminate() { inline RunOptions& RunOptions::SetLoraAdapterActive(const LoraAdapter& adapter) { ThrowOnError(GetApi().RunOptionsSetActiveLoraAdapter(p_, adapter)); + return *this; } namespace detail { diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h index 8564985beb9f7..210ef607c053e 100644 --- a/onnxruntime/core/session/ort_apis.h +++ b/onnxruntime/core/session/ort_apis.h @@ -120,7 +120,6 @@ ORT_API_STATUS_IMPL(RunOptionsSetTerminate, _Inout_ OrtRunOptions* options); ORT_API_STATUS_IMPL(RunOptionsUnsetTerminate, _Inout_ OrtRunOptions* options); ORT_API_STATUS_IMPL(RunOptionsSetActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter*); - ORT_API_STATUS_IMPL(CreateTensorAsOrtValue, _Inout_ OrtAllocator* allocator, _In_ const int64_t* shape, size_t shape_len, ONNXTensorElementDataType type, _Outptr_ OrtValue** out); @@ -526,7 +525,8 @@ ORT_API_STATUS_IMPL(KernelContext_GetScratchBuffer, _In_ const OrtKernelContext* ORT_API_STATUS_IMPL(KernelInfoGetAllocator, _In_ const OrtKernelInfo* info, _In_ OrtMemType mem_type, _Outptr_ OrtAllocator** out); -ORT_API_STATUS_IMPL(CreateLoraAdapter, _In_ const ORTCHAR_T* adapter_file_path, _Outptr_ OrtLoraAdapter** out); +ORT_API_STATUS_IMPL(CreateLoraAdapter, _In_ const ORTCHAR_T* adapter_file_path, _In_ OrtAllocator* allocator, + _Outptr_ OrtLoraAdapter** out); ORT_API(void, ReleaseLoraAdapter, _Frees_ptr_opt_ OrtLoraAdapter*); ORT_API_STATUS_IMPL(RunOptionsSetActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter* adapter); diff --git a/onnxruntime/lora/lora_adapters.cc b/onnxruntime/lora/lora_adapters.cc index 90fae678f7681..b4c5aff90f03d 100644 --- a/onnxruntime/lora/lora_adapters.cc +++ b/onnxruntime/lora/lora_adapters.cc @@ -14,8 +14,12 @@ namespace onnxruntime { namespace lora { -LoraAdapter::LoraParam::LoraParam(OrtValue ort_value) noexcept - : ort_value_(std::move(ort_value)) {} +LoraAdapter::LoraParam::LoraParam(OrtValue ort_value_mapped) noexcept + : ort_value_mapped_(std::move(ort_value_mapped)) {} + +LoraAdapter::LoraParam::LoraParam(OrtValue ort_value_mapped, OrtValue ort_value_device) noexcept + : ort_value_mapped_(std::move(ort_value_mapped)), ort_value_device_(std::move(ort_value_device)) { +} void LoraAdapter::Load(const std::filesystem::path& file_path) { auto buffer = utils::LoadLoraAdapterBytes(file_path); @@ -65,7 +69,7 @@ size_t LoraAdapter::GetBufferSize() const { } // namespace lora } // namespace onnxruntime -ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapter, const ORTCHAR_T* adapter_file_path, +ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapter, const ORTCHAR_T* adapter_file_path, _In_ OrtAllocator* /* allocator */, _Outptr_ OrtLoraAdapter** adapter) { API_IMPL_BEGIN auto lora_adapter = std::make_unique(); diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h index e90e06f526b9b..865fbe3f21cf2 100644 --- a/onnxruntime/lora/lora_adapters.h +++ b/onnxruntime/lora/lora_adapters.h @@ -5,8 +5,9 @@ #include "core/common/common.h" #include "core/common/inlined_containers.h" -#include "core/platform/env.h" +#include "core/framework/allocator.h" #include "core/framework/ort_value.h" +#include "core/platform/env.h" #include "lora/lora_format_utils.h" @@ -24,6 +25,8 @@ namespace lora { class LoraAdapter { public: LoraAdapter() = default; + explicit LoraAdapter(AllocatorPtr device_allocator) + : device_allocator_(std::move(device_allocator)) {} ~LoraAdapter() = default; LoraAdapter(const LoraAdapter&) = delete; LoraAdapter& operator=(const LoraAdapter&) = delete; @@ -96,7 +99,7 @@ class LoraAdapter { for (const auto& [name, param] : params_values_) { *names_out = name.c_str(); ++names_out; - *tensor_out = param.ort_value_; + *tensor_out = param.ort_value_mapped_; ++tensor_out; } } @@ -125,11 +128,14 @@ class LoraAdapter { /// struct LoraParam { LoraParam() = default; - explicit LoraParam(OrtValue parameter) noexcept; + explicit LoraParam(OrtValue ort_value_mapped) noexcept; + LoraParam(OrtValue ort_value_mapped, OrtValue ort_value_device) noexcept; - OrtValue ort_value_; + OrtValue ort_value_mapped_; + OrtValue ort_value_device_; }; + AllocatorPtr device_allocator_; const Adapter* adapter_{nullptr}; InlinedHashMap params_values_; }; diff --git a/onnxruntime/lora/lora_format_utils.cc b/onnxruntime/lora/lora_format_utils.cc index 75604dd62cf4d..378f2833904ca 100644 --- a/onnxruntime/lora/lora_format_utils.cc +++ b/onnxruntime/lora/lora_format_utils.cc @@ -102,7 +102,7 @@ std::pair CreateOrtValueOverLoraParameter(const Parameter const auto data_type = param.data_type(); gsl::span shape_span(param.dims()->data(), param.dims()->size()); - OrtMemoryInfo cpu_meminfo(CPU, OrtAllocatorType::OrtDeviceAllocator); + static const OrtMemoryInfo cpu_meminfo(CPU, OrtAllocatorType::OrtDeviceAllocator); auto elem_type = DataTypeImpl::TensorTypeFromONNXEnum(static_cast(data_type))->GetElementType(); // const_cast is necessery due to Tensor class API @@ -114,6 +114,16 @@ std::pair CreateOrtValueOverLoraParameter(const Parameter return std::make_pair(std::move(name), std::move(result)); } + +OrtValue CreateOrtValueOnDevice(const OrtValue& ort_value_mapped, const AllocatorPtr& device_allocator) { + OrtValue result; + + const auto& tensor = ort_value_mapped.Get(); + Tensor on_device(tensor.DataType(), tensor.Shape(), device_allocator); + + return result; +} + } // namespace utils } // namespace lora } // namespace onnxruntime diff --git a/onnxruntime/lora/lora_format_utils.h b/onnxruntime/lora/lora_format_utils.h index e5587fd730925..3c0bede4c5f2b 100644 --- a/onnxruntime/lora/lora_format_utils.h +++ b/onnxruntime/lora/lora_format_utils.h @@ -4,6 +4,7 @@ #pragma once #include "core/common/flatbuffers.h" +#include "core/framework/allocator.h" #include "core/platform/env.h" #include @@ -84,6 +85,14 @@ void SaveLoraParameter(flatbuffers::FlatBufferBuilder& flat_builder, std::string /// std::pair CreateOrtValueOverLoraParameter(const Parameter& param); +/// +/// Allocates OrtValue on specified device and copies data there +/// +/// parameter on CPU +/// supplied device allocator +/// +OrtValue CreateOrtValueOnDevice(const OrtValue& ort_value_mapped, const AllocatorPtr& device_allocator); + } // namespace utils } // namespace lora } // namespace onnxruntime From 436d61cc94f46a456b909bfb2eddd301c656aedc Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 5 Sep 2024 16:13:58 -0700 Subject: [PATCH 32/84] Make Run() calls Lora aware --- .../onnxruntime/core/framework/run_options.h | 4 +- .../core/session/onnxruntime_c_api.h | 1 + .../core/session/onnxruntime_cxx_api.h | 6 ++ onnxruntime/core/session/onnxruntime_c_api.cc | 61 ++++++++++++++++--- onnxruntime/lora/lora_adapters.h | 8 +-- onnxruntime/test/lora/lora_test.cc | 10 +-- 6 files changed, 72 insertions(+), 18 deletions(-) diff --git a/include/onnxruntime/core/framework/run_options.h b/include/onnxruntime/core/framework/run_options.h index aa741af0f1643..ffe5c61f506c0 100644 --- a/include/onnxruntime/core/framework/run_options.h +++ b/include/onnxruntime/core/framework/run_options.h @@ -5,6 +5,8 @@ #include #include + +#include "core/common/inlined_containers_fwd.h" #include "core/session/onnxruntime_c_api.h" #include "core/framework/config_options.h" @@ -46,7 +48,7 @@ struct OrtRunOptions { // /include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h onnxruntime::ConfigOptions config_options; - std::vector active_adapters_; + onnxruntime::InlinedVector active_adapters_; OrtRunOptions() = default; ~OrtRunOptions() = default; diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 9dde2991636f7..9b9ac819efb18 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -4700,6 +4700,7 @@ struct OrtApi { * The instance of the OrtRunOptions will then can be used to customize the OrtSession::Run() calls. * More than one OrtLoraAdapter can be set active at the same time. Lora Parameters that belong to difference * Lora adapters that will be active at the same time must not overlap. + * This setting does not affect RunWithBinding. * * \param[in] options OrtRunOptions instance * \param[in] adapter OrtLoraAdapter instance diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h index 468317099cd09..d22051ded78ef 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h @@ -779,6 +779,12 @@ struct RunOptions : detail::Base { */ RunOptions& UnsetTerminate(); + /** \brief Designates the argument as an active adapter for the session Run() calls. + * The setting does not affect RunWithBinding() calls. + * + * Wraps OrtApi::RunOptionsSetLoraAdapterActive + * \param adapter The LoraAdapter to be used as the active adapter + */ RunOptions& SetLoraAdapterActive(const LoraAdapter& adapter); }; diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index 96f2ee1e14ee1..be39632dc270e 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -39,6 +39,8 @@ #include "core/platform/ort_mutex.h" #include "core/common/string_helper.h" +#include "lora/lora_adapters.h" + #ifdef USE_CUDA #include "core/providers/cuda/cuda_provider_factory.h" #include "core/providers/cuda/cuda_execution_provider_info.h" @@ -813,6 +815,37 @@ ORT_API_STATUS_IMPL(OrtApis::CreateSessionFromArray, _In_ const OrtEnv* env, _In API_IMPL_END } +namespace { +// Checks if there are active lora adapters and adjusts input spans. +void CheckAndAdjustForLora(const OrtRunOptions* run_options, + InlinedVector& input_names_with_lora, + InlinedVector input_with_lora, + gsl::span& input_names, + gsl::span& inputs) { + if (!run_options->active_adapters_.empty()) { + size_t total_lora_params = 0; + for (const lora::LoraAdapter* ad : run_options->active_adapters_) { + total_lora_params += ad->GetParamNum(); + } + + input_names_with_lora.reserve(input_names.size() + total_lora_params); + input_with_lora.reserve(inputs.size() + total_lora_params); + std::copy(input_names.begin(), input_names.end(), std::back_inserter(input_names_with_lora)); + std::copy(inputs.begin(), inputs.end(), std::back_inserter(input_with_lora)); + + // XXX: Currently only on CPU. + for (const lora::LoraAdapter* ad : run_options->active_adapters_) { + ad->OutputLoadedAdaptersParameters(std::back_inserter(input_names_with_lora), + std::back_inserter(input_with_lora)); + } + + input_names = gsl::make_span(input_names_with_lora); + inputs = gsl::make_span(input_with_lora); + } +} + +} // namespace + ORT_API_STATUS_IMPL(OrtApis::Run, _Inout_ OrtSession* sess, _In_opt_ const OrtRunOptions* run_options, _In_reads_(input_len) const char* const* input_names, _In_reads_(input_len) const OrtValue* const* input, size_t input_len, @@ -821,19 +854,26 @@ ORT_API_STATUS_IMPL(OrtApis::Run, _Inout_ OrtSession* sess, _In_opt_ const OrtRu API_IMPL_BEGIN auto session = reinterpret_cast<::onnxruntime::InferenceSession*>(sess); - gsl::span input_names_span(input_names, input_len); - gsl::span input_span(input, input_len); - gsl::span output_name_span(output_names, output_names_len); - gsl::span output_span(output, output_names_len); + auto input_names_span = gsl::make_span(input_names, input_len); + auto input_span = gsl::make_span(input, input_len); + auto output_name_span = gsl::make_span(output_names, output_names_len); + auto output_span = gsl::make_span(output, output_names_len); Status status; if (run_options) { + + InlinedVector input_names_with_lora; + InlinedVector input_with_lora; + + CheckAndAdjustForLora(run_options, input_names_with_lora, input_with_lora, input_names_span, input_span); + status = session->Run(*run_options, input_names_span, input_span, output_name_span, output_span); } else { + const RunOptions default_run_options; status = session->Run(default_run_options, input_names_span, @@ -854,10 +894,15 @@ ORT_API_STATUS_IMPL(OrtApis::RunAsync, _Inout_ OrtSession* sess, _In_opt_ const API_IMPL_BEGIN auto session = reinterpret_cast<::onnxruntime::InferenceSession*>(sess); - gsl::span input_names_span(input_names, input_len); - gsl::span input_span(input, input_len); - gsl::span output_name_span(output_names, output_names_len); - gsl::span output_span(output, output_names_len); + auto input_names_span = gsl::make_span(input_names, input_len); + auto input_span = gsl::make_span(input, input_len); + auto output_name_span = gsl::make_span(output_names, output_names_len); + auto output_span = gsl::make_span(output, output_names_len); + + InlinedVector input_names_with_lora; + InlinedVector input_with_lora; + + CheckAndAdjustForLora(run_options, input_names_with_lora, input_with_lora, input_names_span, input_span); return ToOrtStatus(session->RunAsync(run_options, input_names_span, diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h index 865fbe3f21cf2..fa9aeaba10232 100644 --- a/onnxruntime/lora/lora_adapters.h +++ b/onnxruntime/lora/lora_adapters.h @@ -86,20 +86,20 @@ class LoraAdapter { } /// - /// Outputs Lora Parameters, their names and values + /// Outputs Lora Parameters on CPU, their names and values /// into the supplied output iterators. /// /// /// /// output iterator that accepts const char* - /// output iterator that accepts OrtValue + /// output iterator that accepts const OrtValue* template - void OutputAdaptersParameters(NamesOutputIter names_out, + void OutputLoadedAdaptersParameters(NamesOutputIter names_out, TensorOutputIter tensor_out) const { for (const auto& [name, param] : params_values_) { *names_out = name.c_str(); ++names_out; - *tensor_out = param.ort_value_mapped_; + *tensor_out = ¶m.ort_value_mapped_; ++tensor_out; } } diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index 6393cbc697030..c4cf617960280 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -86,21 +86,21 @@ auto verify_load = [](const lora::LoraAdapter& adapter) { ASSERT_EQ(param_num, 2U); InlinedVector names; - InlinedVector ort_values; + InlinedVector ort_values; names.reserve(param_num); ort_values.reserve(param_num); - adapter.OutputAdaptersParameters(std::back_inserter(names), std::back_inserter(ort_values)); + adapter.OutputLoadedAdaptersParameters(std::back_inserter(names), std::back_inserter(ort_values)); ASSERT_EQ(param_num, names.size()); ASSERT_EQ(param_num, ort_values.size()); for (size_t i = 0; i < param_num; ++i) { const auto& name = names[i]; - const auto& ort_value = ort_values[i]; + const auto* ort_value = ort_values[i]; ASSERT_TRUE(name != nullptr); - ASSERT_TRUE(ort_value.IsTensor()); + ASSERT_TRUE(ort_value->IsTensor()); - const auto& tensor = ort_value.Get(); + const auto& tensor = ort_value->Get(); ASSERT_NE(tensor.GetElementType(), ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED); const auto shape = tensor.Shape().GetDims(); From 1584a1c28b34ed21509532050eed75089ad5c1aa Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 5 Sep 2024 16:37:07 -0700 Subject: [PATCH 33/84] Add format builder --- onnxruntime/lora/lora_format_utils.cc | 29 +++++++++++++++++++ onnxruntime/lora/lora_format_utils.h | 40 +++++++++++++++++++++++++++ onnxruntime/test/lora/lora_test.cc | 34 ++++++----------------- 3 files changed, 77 insertions(+), 26 deletions(-) diff --git a/onnxruntime/lora/lora_format_utils.cc b/onnxruntime/lora/lora_format_utils.cc index 378f2833904ca..9a4c1ce6f2415 100644 --- a/onnxruntime/lora/lora_format_utils.cc +++ b/onnxruntime/lora/lora_format_utils.cc @@ -124,6 +124,35 @@ OrtValue CreateOrtValueOnDevice(const OrtValue& ort_value_mapped, const Allocato return result; } +void AdapterFormatBuilder::AddParameter(const std::string& name, lora::TensorDataType data_type, + gsl::span shape, gsl::span data) { + flatbuffers::Offset fbs_param; + SaveLoraParameter(builder_, name, data_type, shape, data, fbs_param); + params_.push_back(fbs_param); +} + +std::vector AdapterFormatBuilder::Finish(int adapter_version, int model_version) { + FinishImpl(adapter_version, model_version); + + std::vector result; + result.reserve(builder_.GetSize()); + gsl::span buffer(builder_.GetBufferPointer(), builder_.GetSize()); + std::copy(buffer.begin(), buffer.end(), std::back_inserter(result)); + return result; +} + +gsl::span AdapterFormatBuilder::FinishWithSpan(int adapter_version, int model_version) { + FinishImpl(adapter_version, model_version); + return gsl::make_span(builder_.GetBufferPointer(), builder_.GetSize()); +} + +void AdapterFormatBuilder::FinishImpl(int adapter_version, int model_version) { + auto fbs_params = builder_.CreateVector(params_); + auto fbs_adapter = lora::CreateAdapter(builder_, lora::kLoraFormatVersion, adapter_version, + model_version, fbs_params); + builder_.Finish(fbs_adapter, lora::AdapterIdentifier()); +} + } // namespace utils } // namespace lora } // namespace onnxruntime diff --git a/onnxruntime/lora/lora_format_utils.h b/onnxruntime/lora/lora_format_utils.h index 3c0bede4c5f2b..e7e341945f2ca 100644 --- a/onnxruntime/lora/lora_format_utils.h +++ b/onnxruntime/lora/lora_format_utils.h @@ -23,6 +23,46 @@ namespace onnxruntime { namespace lora { namespace utils { +/// +/// Helper class to serialize Lora adapter +/// +class AdapterFormatBuilder { + public: + AdapterFormatBuilder() = default; + + /// + /// Appends parameter tensor to the adapter builder + /// + /// parameter name + /// + /// + /// + void AddParameter(const std::string& name, lora::TensorDataType data_type, + gsl::span shape, gsl::span data); + + /// + /// Finishes serialization and returns a serialized byte vector + /// + /// + /// + /// + std::vector Finish(int adapter_version, int model_version); + + /// + /// Finishes serialization and returns a span to internal buffer. + /// + /// + /// + /// + gsl::span FinishWithSpan(int adapter_version, int model_version); + + private: + void FinishImpl(int adapter_version, int model_version); + + flatbuffers::FlatBufferBuilder builder_; + std::vector> params_; +}; + /// /// /// diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index c4cf617960280..65a49865fed2d 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -150,31 +150,13 @@ struct GenerateTestParameters { std::iota(param_2.begin(), param_2.end(), T{32}); } - flatbuffers::FlatBufferBuilder builder; - std::vector> params; - params.reserve(2); - - flatbuffers::Offset fbs_param_1, fbs_param_2; - auto byte_span = ReinterpretAsSpan(gsl::make_span(param_1)); - lora::utils::SaveLoraParameter(builder, "param_1", static_cast(data_type), param_shape, - byte_span, fbs_param_1); - params.push_back(fbs_param_1); - - byte_span = ReinterpretAsSpan(gsl::make_span(param_2)); - lora::utils::SaveLoraParameter(builder, "param_2", static_cast(data_type), param_shape, - byte_span, fbs_param_2); - params.push_back(fbs_param_2); - - auto fbs_params = builder.CreateVector(params); - auto fbs_adapter = lora::CreateAdapter(builder, lora::kLoraFormatVersion, kAdapterVersion, kModelVersion, - fbs_params); - builder.Finish(fbs_adapter, lora::AdapterIdentifier()); - - std::vector result; - result.reserve(builder.GetSize()); - gsl::span buffer(builder.GetBufferPointer(), builder.GetSize()); - std::copy(buffer.begin(), buffer.end(), std::back_inserter(result)); - return result; + lora::utils::AdapterFormatBuilder adapter_builder; + adapter_builder.AddParameter("param_1", static_cast(data_type), + param_shape, ReinterpretAsSpan(gsl::make_span(param_1))); + adapter_builder.AddParameter("param_2", static_cast(data_type), + param_shape, ReinterpretAsSpan(gsl::make_span(param_2))); + + return adapter_builder.Finish(kAdapterVersion, kModelVersion); } }; @@ -194,7 +176,7 @@ TEST(LoraAdapterTest, Load) { // Test different data types const auto data_types = gsl::make_span(lora::EnumValuesTensorDataType()); for (size_t i = 1, size = data_types.size(); i < size; ++i) { - if (i == 8 || i == 9 || i == 14 || i == 15 || (i > 16 && i < 21)) + if (i == 8 || i == 9 || i == 14 || i == 15 || (i > 16 && i < 21)) continue; utils::MLTypeCallDispatcher Date: Mon, 9 Sep 2024 10:05:55 -0700 Subject: [PATCH 34/84] Start Python impl --- onnxruntime/python/onnxruntime_pybind_lora.cc | 104 ++++++++++++++++++ .../python/onnxruntime_pybind_mlvalue.cc | 6 +- onnxruntime/python/onnxruntime_pybind_state.h | 1 + 3 files changed, 107 insertions(+), 4 deletions(-) create mode 100644 onnxruntime/python/onnxruntime_pybind_lora.cc diff --git a/onnxruntime/python/onnxruntime_pybind_lora.cc b/onnxruntime/python/onnxruntime_pybind_lora.cc new file mode 100644 index 0000000000000..5339bdf257dca --- /dev/null +++ b/onnxruntime/python/onnxruntime_pybind_lora.cc @@ -0,0 +1,104 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +#include "python/onnxruntime_pybind_exceptions.h" +#include "python/onnxruntime_pybind_mlvalue.h" +#include "python/onnxruntime_pybind_state_common.h" + +#define NO_IMPORT_ARRAY +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#define PY_ARRAY_UNIQUE_SYMBOL onnxruntime_python_ARRAY_API +#include "python/numpy_helper.h" + +#include "core/graph/onnx_protobuf.h" + +#include "core/framework/ort_value.h" +#include "core/framework/tensor.h" + +#include "lora/lora_format_utils.h" + +namespace onnxruntime { +namespace python { + +namespace py = pybind11; + +namespace { + +// Check if the numpy dtype descr property has any of the known types +// that is not supported natively by numpy arrays +std::optional GetDescrPropertyString(const py::dtype& arr_dtype) { + std::string custom_type; + try { + if (py::hasattr(arr_dtype, "descr")) { + auto descr = py::getattr(arr_dtype, "descr").cast(); + if (descr.size() > 0) { + auto item = descr[0].cast(); + if (item.size() > 0) { + custom_type = item[0].cast(); + } + } + } + } catch (const py::cast_error&) { + // Ignore the exception + PyErr_Clear(); + return {}; + } + return custom_type; +} +} // namespace + +void AddLoraMethods(pybind11::module& m) { + m.def( + "export_lora_parameters", [](const std::string& file_name, int adapter_version, int model_version, const pybind11::dict& lora_parameters) { + std::ofstream file(file_name, std::ios::binary); + if (file.fail()) { + ORT_THROW("Failed to open file:", file_name, " for writing."); + } + + lora::utils::AdapterFormatBuilder format_builder; + for (const auto& [n, arr] : lora_parameters) { + const std::string param_name = py::str(n); + py::array np_array = arr.cast(); + + py::dtype arr_dtype = np_array.dtype(); + + // This is the element type as supported by numpy, + // however, we can have bfloat16 and float8 types custome types defined. + auto ml_element_type = NumpyTypeToOnnxRuntimeTensorType(arr_dtype.num()); + auto onnx_element_type = static_cast( + ml_element_type->AsPrimitiveDataType()->GetDataType()); + + if (!ONNX_NAMESPACE::TensorProto_DataType_IsValid(onnx_element_type)) { + ORT_THROW("Unsupported tensor ONNX element type: ", onnx_element_type); + } + + // Adjust for custom ONNX types + // see https://github.com/onnx/onnx/blob/main/onnx/_custom_element_types.py + switch (onnx_element_type) { + // Check if this really means BFloat16 as numpy custom types are conveyed + // by means of special annotations. + case ONNX_NAMESPACE::TensorProto_DataType_UINT16: { + auto custom_type = GetDescrPropertyString(arr_dtype); + if (custom_type.has_value()) { + // onnx_element_type = map string to type + } + break; + } + + // Check if this really means one of the float8 types + case ONNX_NAMESPACE::TensorProto_DataType_INT8: { + auto custom_type = GetDescrPropertyString(arr_dtype); + if (custom_type.has_value()) { + // onnx_element_type = map string to type + } + break; + } + default: + break; + }; + } + }, + "Save lora adapter parameters into a lora file format. "); +} + +} // namespace python +} // namespace onnxruntime \ No newline at end of file diff --git a/onnxruntime/python/onnxruntime_pybind_mlvalue.cc b/onnxruntime/python/onnxruntime_pybind_mlvalue.cc index 8fdac257297c1..010039e2e8417 100644 --- a/onnxruntime/python/onnxruntime_pybind_mlvalue.cc +++ b/onnxruntime/python/onnxruntime_pybind_mlvalue.cc @@ -87,15 +87,13 @@ static TensorShape GetArrayShape(PyArrayObject* pyObject) { const int ndim = PyArray_NDIM(pyObject); const npy_intp* npy_dims = PyArray_DIMS(pyObject); auto span = gsl::make_span(npy_dims, ndim); - std::vector dims(span.begin(), span.end()); - TensorShape shape(std::move(dims)); + TensorShape shape(span); return shape; } TensorShape GetShape(const py::array& arr) { auto span = gsl::make_span(arr.shape(), arr.ndim()); - std::vector dims(span.begin(), span.end()); - TensorShape shape(std::move(dims)); + TensorShape shape(span); return shape; } diff --git a/onnxruntime/python/onnxruntime_pybind_state.h b/onnxruntime/python/onnxruntime_pybind_state.h index 47cde0d4cf193..fc9ef83d7a0d3 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.h +++ b/onnxruntime/python/onnxruntime_pybind_state.h @@ -9,6 +9,7 @@ namespace python { void addGlobalMethods(py::module& m, Environment& env); void addObjectMethods(py::module& m, Environment& env); void addOrtValueMethods(pybind11::module& m); +void AddLoraMethods(pybind11::module& m); } // namespace python } // namespace onnxruntime From 96fddbed84fa28d005b0148ac716e59ee5e5aeb7 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Mon, 9 Sep 2024 18:47:30 -0700 Subject: [PATCH 35/84] Add Python layer --- .../onnxruntime/core/framework/run_options.h | 4 +- onnxruntime/core/framework/run_options.cc | 2 +- onnxruntime/core/session/onnxruntime_c_api.cc | 7 +- onnxruntime/lora/lora_adapters.cc | 22 +- onnxruntime/lora/lora_adapters.h | 64 +++-- onnxruntime/python/onnxruntime_pybind_lora.cc | 241 ++++++++++++++++-- onnxruntime/python/onnxruntime_pybind_state.h | 2 +- onnxruntime/test/lora/lora_test.cc | 4 +- 8 files changed, 274 insertions(+), 72 deletions(-) diff --git a/include/onnxruntime/core/framework/run_options.h b/include/onnxruntime/core/framework/run_options.h index ffe5c61f506c0..58e566fcd7166 100644 --- a/include/onnxruntime/core/framework/run_options.h +++ b/include/onnxruntime/core/framework/run_options.h @@ -12,7 +12,7 @@ namespace onnxruntime { namespace lora { -class LoraAdapter; +class LoadedAdapter; } } // namespace onnxruntime @@ -48,7 +48,7 @@ struct OrtRunOptions { // /include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h onnxruntime::ConfigOptions config_options; - onnxruntime::InlinedVector active_adapters_; + onnxruntime::InlinedVector active_adapters_; OrtRunOptions() = default; ~OrtRunOptions() = default; diff --git a/onnxruntime/core/framework/run_options.cc b/onnxruntime/core/framework/run_options.cc index b0ea7f7c9d843..c061204b6e4ee 100644 --- a/onnxruntime/core/framework/run_options.cc +++ b/onnxruntime/core/framework/run_options.cc @@ -67,7 +67,7 @@ ORT_API_STATUS_IMPL(OrtApis::AddRunConfigEntry, _Inout_ OrtRunOptions* options, ORT_API_STATUS_IMPL(OrtApis::RunOptionsSetActiveLoraAdapter, _Inout_ OrtRunOptions* options, const _In_ OrtLoraAdapter* adapter) { API_IMPL_BEGIN - auto* lora_adapter = reinterpret_cast(adapter); + auto* lora_adapter = reinterpret_cast(adapter); options->active_adapters_.push_back(lora_adapter); return nullptr; API_IMPL_END diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index be39632dc270e..85bad75e36141 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -824,7 +824,7 @@ void CheckAndAdjustForLora(const OrtRunOptions* run_options, gsl::span& inputs) { if (!run_options->active_adapters_.empty()) { size_t total_lora_params = 0; - for (const lora::LoraAdapter* ad : run_options->active_adapters_) { + for (const lora::LoadedAdapter* ad : run_options->active_adapters_) { total_lora_params += ad->GetParamNum(); } @@ -833,8 +833,7 @@ void CheckAndAdjustForLora(const OrtRunOptions* run_options, std::copy(input_names.begin(), input_names.end(), std::back_inserter(input_names_with_lora)); std::copy(inputs.begin(), inputs.end(), std::back_inserter(input_with_lora)); - // XXX: Currently only on CPU. - for (const lora::LoraAdapter* ad : run_options->active_adapters_) { + for (const lora::LoadedAdapter* ad : run_options->active_adapters_) { ad->OutputLoadedAdaptersParameters(std::back_inserter(input_names_with_lora), std::back_inserter(input_with_lora)); } @@ -861,7 +860,6 @@ ORT_API_STATUS_IMPL(OrtApis::Run, _Inout_ OrtSession* sess, _In_opt_ const OrtRu Status status; if (run_options) { - InlinedVector input_names_with_lora; InlinedVector input_with_lora; @@ -873,7 +871,6 @@ ORT_API_STATUS_IMPL(OrtApis::Run, _Inout_ OrtSession* sess, _In_opt_ const OrtRu output_name_span, output_span); } else { - const RunOptions default_run_options; status = session->Run(default_run_options, input_names_span, diff --git a/onnxruntime/lora/lora_adapters.cc b/onnxruntime/lora/lora_adapters.cc index b4c5aff90f03d..454ec3ad2b6c2 100644 --- a/onnxruntime/lora/lora_adapters.cc +++ b/onnxruntime/lora/lora_adapters.cc @@ -14,25 +14,25 @@ namespace onnxruntime { namespace lora { -LoraAdapter::LoraParam::LoraParam(OrtValue ort_value_mapped) noexcept +LoadedAdapter::Param::Param(OrtValue ort_value_mapped) noexcept : ort_value_mapped_(std::move(ort_value_mapped)) {} -LoraAdapter::LoraParam::LoraParam(OrtValue ort_value_mapped, OrtValue ort_value_device) noexcept +LoadedAdapter::Param::Param(OrtValue ort_value_mapped, OrtValue ort_value_device) noexcept : ort_value_mapped_(std::move(ort_value_mapped)), ort_value_device_(std::move(ort_value_device)) { } -void LoraAdapter::Load(const std::filesystem::path& file_path) { +void LoadedAdapter::Load(const std::filesystem::path& file_path) { auto buffer = utils::LoadLoraAdapterBytes(file_path); Load(std::move(buffer)); } -void LoraAdapter::Load(std::vector buffer) { +void LoadedAdapter::Load(std::vector buffer) { adapter_ = utils::ValidateAndGetAdapterFromBytes(buffer); buffer_.emplace(std::move(buffer)); InitializeParamsValues(); } -void LoraAdapter::MemoryMap(const std::filesystem::path& file_path) { +void LoadedAdapter::MemoryMap(const std::filesystem::path& file_path) { auto [mapped_memory, file_size] = utils::MemoryMapAdapterFile(file_path); auto u8_span = ReinterpretAsSpan(gsl::make_span(mapped_memory.get(), file_size)); adapter_ = utils::ValidateAndGetAdapterFromBytes(u8_span); @@ -41,23 +41,23 @@ void LoraAdapter::MemoryMap(const std::filesystem::path& file_path) { InitializeParamsValues(); } -void LoraAdapter::InitializeParamsValues() { +void LoadedAdapter::InitializeParamsValues() { if (adapter_ == nullptr) { ORT_THROW("Adapter is not loaded yet."); } const auto* params = adapter_->parameters(); - InlinedHashMap params_values; + InlinedHashMap params_values; params_values.reserve(params->size()); for (const auto* param : *params) { auto [name, ort_value] = utils::CreateOrtValueOverLoraParameter(*param); - LoraParam lora_param(std::move(ort_value)); + Param lora_param(std::move(ort_value)); params_values.emplace(std::move(name), std::move(lora_param)); } params_values_.swap(params_values); } -size_t LoraAdapter::GetBufferSize() const { +size_t LoadedAdapter::GetBufferSize() const { if (std::holds_alternative(buffer_)) { return std::get<1>(buffer_).file_size_; } else if (std::holds_alternative(buffer_)) { @@ -72,7 +72,7 @@ size_t LoraAdapter::GetBufferSize() const { ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapter, const ORTCHAR_T* adapter_file_path, _In_ OrtAllocator* /* allocator */, _Outptr_ OrtLoraAdapter** adapter) { API_IMPL_BEGIN - auto lora_adapter = std::make_unique(); + auto lora_adapter = std::make_unique(); // For platforms that do not support Memmap, we can #ifdef it to ->Load(adapter_file_path) lora_adapter->Load(adapter_file_path); *adapter = reinterpret_cast(lora_adapter.release()); @@ -81,5 +81,5 @@ ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapter, const ORTCHAR_T* adapter_file_pa } ORT_API(void, OrtApis::ReleaseLoraAdapter, _Frees_ptr_opt_ OrtLoraAdapter* adapter) { - delete reinterpret_cast(adapter); + delete reinterpret_cast(adapter); } diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h index fa9aeaba10232..3298031b73bd2 100644 --- a/onnxruntime/lora/lora_adapters.h +++ b/onnxruntime/lora/lora_adapters.h @@ -22,17 +22,45 @@ namespace lora { /// /// Container to hold and access Lora Parameters /// -class LoraAdapter { +class LoadedAdapter { public: - LoraAdapter() = default; - explicit LoraAdapter(AllocatorPtr device_allocator) + LoadedAdapter() = default; + explicit LoadedAdapter(AllocatorPtr device_allocator) : device_allocator_(std::move(device_allocator)) {} - ~LoraAdapter() = default; - LoraAdapter(const LoraAdapter&) = delete; - LoraAdapter& operator=(const LoraAdapter&) = delete; + ~LoadedAdapter() = default; + LoadedAdapter(const LoadedAdapter&) = delete; + LoadedAdapter& operator=(const LoadedAdapter&) = delete; - LoraAdapter(LoraAdapter&&) = default; - LoraAdapter& operator=(LoraAdapter&&) = default; + LoadedAdapter(LoadedAdapter&&) = default; + LoadedAdapter& operator=(LoadedAdapter&&) = default; + + /// + /// Represents a named lora parameter (tensor) + /// + class Param { + public: + Param() = default; + explicit Param(OrtValue ort_value_mapped) noexcept; + Param(OrtValue ort_value_mapped, OrtValue ort_value_device) noexcept; + + const OrtValue& GetMapped() const { + return ort_value_mapped_; + } + + private: + OrtValue ort_value_mapped_; + OrtValue ort_value_device_; + }; + + using param_iterator = InlinedHashMap::const_iterator; + + /// + /// Obtain a range of the iterators + /// + /// + std::pair GetParamIterators() const { + return std::make_pair(params_values_.cbegin(), params_values_.cend()); + } /// /// Load parameters into memory from an adapter file and validates its format. @@ -65,7 +93,7 @@ class LoraAdapter { /// Gets lora format version /// /// - int LoraFormatVersion() const noexcept { + int FormatVersion() const noexcept { return adapter_->format_version(); } @@ -95,11 +123,11 @@ class LoraAdapter { /// output iterator that accepts const OrtValue* template void OutputLoadedAdaptersParameters(NamesOutputIter names_out, - TensorOutputIter tensor_out) const { + TensorOutputIter tensor_out) const { for (const auto& [name, param] : params_values_) { *names_out = name.c_str(); ++names_out; - *tensor_out = ¶m.ort_value_mapped_; + *tensor_out = ¶m.GetMapped(); ++tensor_out; } } @@ -123,21 +151,9 @@ class LoraAdapter { std::variant buffer_; - /// - /// Represents a named lora parameter (tensor) - /// - struct LoraParam { - LoraParam() = default; - explicit LoraParam(OrtValue ort_value_mapped) noexcept; - LoraParam(OrtValue ort_value_mapped, OrtValue ort_value_device) noexcept; - - OrtValue ort_value_mapped_; - OrtValue ort_value_device_; - }; - AllocatorPtr device_allocator_; const Adapter* adapter_{nullptr}; - InlinedHashMap params_values_; + InlinedHashMap params_values_; }; } // namespace lora diff --git a/onnxruntime/python/onnxruntime_pybind_lora.cc b/onnxruntime/python/onnxruntime_pybind_lora.cc index 5339bdf257dca..810f0196baaec 100644 --- a/onnxruntime/python/onnxruntime_pybind_lora.cc +++ b/onnxruntime/python/onnxruntime_pybind_lora.cc @@ -15,6 +15,12 @@ #include "core/framework/tensor.h" #include "lora/lora_format_utils.h" +#include "lora/lora_adapters.h" + +#include + +#include +#include namespace onnxruntime { namespace python { @@ -24,9 +30,18 @@ namespace py = pybind11; namespace { // Check if the numpy dtype descr property has any of the known types -// that is not supported natively by numpy arrays +// that is not supported natively by numpy arrays. +// For example: +// >>> bfloat16 = np.dtype((np.uint16, {"bfloat16": (np.uint16, 0)})) +// >>> print(bfloat16.descr) +// [('bfloat16', ' GetDescrPropertyString(const py::dtype& arr_dtype) { - std::string custom_type; + std::optional custom_type; try { if (py::hasattr(arr_dtype, "descr")) { auto descr = py::getattr(arr_dtype, "descr").cast(); @@ -40,29 +55,131 @@ std::optional GetDescrPropertyString(const py::dtype& arr_dtype) { } catch (const py::cast_error&) { // Ignore the exception PyErr_Clear(); - return {}; } return custom_type; } + +// bfloat16 = np.dtype((np.uint16, {"bfloat16": (np.uint16, 0)})) +py::dtype ConstructCustomDtype(int32_t npy_type, const std::string& custom_type_tag) { + py::dtype first_arg(npy_type); + + py::dict second_arg; + second_arg[py::str(custom_type_tag)] = py::make_tuple(first_arg, 0); + auto tuple = py::make_tuple(std::move(first_arg), std::move(second_arg)); + + py::dtype result{py::dtype::from_args(tuple)}; + return result; +} + +// Get mapped OnnxDataType from numpy dtype descriptior +// float4e2m1 unsupported at the moment +std::optional GetOnnxDataTypeFromCustomPythonDescr(const std::string& descr) { + static const std::unordered_map dtype_descr = { + {"bfloat16", ONNX_NAMESPACE::TensorProto_DataType_BFLOAT16}, + {"e4m3fn", ONNX_NAMESPACE::TensorProto_DataType_FLOAT8E4M3FN}, + {"e4m3fnuz", ONNX_NAMESPACE::TensorProto_DataType_FLOAT8E4M3FNUZ}, + {"e5m2", ONNX_NAMESPACE::TensorProto_DataType_FLOAT8E5M2}, + {"e5m2fnuz", ONNX_NAMESPACE::TensorProto_DataType_FLOAT8E5M2FNUZ}, + {"int4", ONNX_NAMESPACE::TensorProto_DataType_INT4}, + {"uint4", ONNX_NAMESPACE::TensorProto_DataType_UINT4}, + }; + + auto hit = dtype_descr.find(descr); + if (hit == dtype_descr.end()) { + return std::nullopt; + } + + return hit->second; +} + +// If a custom type is discovered in numpy array we set the correct ONNX type. +int32_t AdjustOnnxTypeIfNeeded(const py::dtype& arr_dtype, int32_t base_type_from_array) { + auto descr = GetDescrPropertyString(arr_dtype); + if (descr.has_value()) { + auto adjusted_type = GetOnnxDataTypeFromCustomPythonDescr(*descr); + if (adjusted_type.has_value()) { + return *adjusted_type; + } + } + return base_type_from_array; +} + +std::optional FromOnnxTypeToNumpySupportedType(int32_t onnx_type) { + // Numpy supported types mapping + static std::unordered_map onnxtype_to_numpy{ + {ONNX_NAMESPACE::TensorProto_DataType_BOOL, NPY_BOOL}, + {ONNX_NAMESPACE::TensorProto_DataType_FLOAT, NPY_FLOAT}, + {ONNX_NAMESPACE::TensorProto_DataType_FLOAT16, NPY_FLOAT16}, + {ONNX_NAMESPACE::TensorProto_DataType_DOUBLE, NPY_DOUBLE}, + {ONNX_NAMESPACE::TensorProto_DataType_INT8, NPY_INT8}, + {ONNX_NAMESPACE::TensorProto_DataType_UINT8, NPY_UINT8}, + {ONNX_NAMESPACE::TensorProto_DataType_INT16, NPY_INT16}, + {ONNX_NAMESPACE::TensorProto_DataType_UINT16, NPY_UINT16}, + {ONNX_NAMESPACE::TensorProto_DataType_INT32, NPY_INT}, + {ONNX_NAMESPACE::TensorProto_DataType_UINT32, NPY_UINT}, + {ONNX_NAMESPACE::TensorProto_DataType_INT64, NPY_LONGLONG}, + {ONNX_NAMESPACE::TensorProto_DataType_UINT64, NPY_ULONGLONG}, + {ONNX_NAMESPACE::TensorProto_DataType_STRING, NPY_STRING}, + }; + + auto hit = onnxtype_to_numpy.find(onnx_type); + if (hit == onnxtype_to_numpy.end()) + return std::nullopt; + + return hit->second; +} + +std::optional> GetCustomNumpyTypeFromOnnxType(int32_t onnx_data_type) { + static const std::unordered_map> onnxtype_to_custom_numpy_type = { + {ONNX_NAMESPACE::TensorProto_DataType_BFLOAT16, {NPY_UINT16, "bfloat16"}}, + {ONNX_NAMESPACE::TensorProto_DataType_FLOAT8E4M3FN, {NPY_UINT8, "e4m3fn"}}, + {ONNX_NAMESPACE::TensorProto_DataType_FLOAT8E4M3FNUZ, {NPY_UINT8, "e4m3fnuz"}}, + {ONNX_NAMESPACE::TensorProto_DataType_FLOAT8E5M2, {NPY_UINT8, "e5m2"}}, + {ONNX_NAMESPACE::TensorProto_DataType_FLOAT8E5M2FNUZ, {NPY_UINT8, "e5m2fnuz"}}, + {ONNX_NAMESPACE::TensorProto_DataType_INT4, {NPY_INT8, "int4"}}, + {ONNX_NAMESPACE::TensorProto_DataType_UINT4, {NPY_UINT8, "uint4"}}}; + + auto hit = onnxtype_to_custom_numpy_type.find(onnx_data_type); + if (hit == onnxtype_to_custom_numpy_type.end()) { + return std::nullopt; + } + + return hit->second; +} + +py::dtype ConstructDType(int32_t onnx_type) { + // check if the type maps to onnx custom type + auto custom_type = GetCustomNumpyTypeFromOnnxType(onnx_type); + if (custom_type.has_value()) { + return ConstructCustomDtype(custom_type->first, custom_type->second); + } + + auto npy_type = FromOnnxTypeToNumpySupportedType(onnx_type); + if (npy_type.has_value()) { + return py::dtype(*npy_type); + } + ORT_THROW("Unsupported type detected:", onnx_type); +} + } // namespace -void AddLoraMethods(pybind11::module& m) { +void AddAdapterMethods(pybind11::module& m) { m.def( - "export_lora_parameters", [](const std::string& file_name, int adapter_version, int model_version, const pybind11::dict& lora_parameters) { + "export_adapter", [](const std::string& file_name, int adapter_version, int model_version, const pybind11::dict& adapter_parameters) { std::ofstream file(file_name, std::ios::binary); if (file.fail()) { ORT_THROW("Failed to open file:", file_name, " for writing."); } lora::utils::AdapterFormatBuilder format_builder; - for (const auto& [n, arr] : lora_parameters) { + for (const auto& [n, arr] : adapter_parameters) { const std::string param_name = py::str(n); py::array np_array = arr.cast(); py::dtype arr_dtype = np_array.dtype(); // This is the element type as supported by numpy, - // however, we can have bfloat16 and float8 types custome types defined. + // however, we can have bfloat16 and float8 custom types defined. auto ml_element_type = NumpyTypeToOnnxRuntimeTensorType(arr_dtype.num()); auto onnx_element_type = static_cast( ml_element_type->AsPrimitiveDataType()->GetDataType()); @@ -71,33 +188,105 @@ void AddLoraMethods(pybind11::module& m) { ORT_THROW("Unsupported tensor ONNX element type: ", onnx_element_type); } - // Adjust for custom ONNX types - // see https://github.com/onnx/onnx/blob/main/onnx/_custom_element_types.py switch (onnx_element_type) { - // Check if this really means BFloat16 as numpy custom types are conveyed - // by means of special annotations. - case ONNX_NAMESPACE::TensorProto_DataType_UINT16: { - auto custom_type = GetDescrPropertyString(arr_dtype); - if (custom_type.has_value()) { - // onnx_element_type = map string to type - } - break; - } - - // Check if this really means one of the float8 types - case ONNX_NAMESPACE::TensorProto_DataType_INT8: { - auto custom_type = GetDescrPropertyString(arr_dtype); - if (custom_type.has_value()) { - // onnx_element_type = map string to type - } + case ONNX_NAMESPACE::TensorProto_DataType_UINT16: + case ONNX_NAMESPACE::TensorProto_DataType_INT8: + case ONNX_NAMESPACE::TensorProto_DataType_UINT8: { + onnx_element_type = + static_cast(AdjustOnnxTypeIfNeeded(arr_dtype, + onnx_element_type)); break; } default: break; }; + + gsl::span shape_span{reinterpret_cast(np_array.shape()), + static_cast(np_array.ndim())}; + gsl::span data_span{reinterpret_cast(np_array.data()), + static_cast(np_array.nbytes())}; + + format_builder.AddParameter(param_name, static_cast(onnx_element_type), + shape_span, data_span); + } + auto format_span = format_builder.FinishWithSpan(adapter_version, model_version); + if (file.write(reinterpret_cast(format_span.data()), format_span.size()).fail()) { + ORT_THROW("Failed to write :", std::to_string(format_span.size()), " bytes to ", file_name); + } + + if (file.flush().fail()) { + ORT_THROW("Failed to flush :", file_name, " on close"); } }, - "Save lora adapter parameters into a lora file format. "); + "Save adapter parameters into a lora file format. "); + + class PyAdapter { + public: + PyAdapter(int format_version, int adapter_version, + int model_version, py::dict params) : format_version_(format_version), adapter_version_(adapter_version), model_version_(model_version), parameters_(std::move(params)) {} + + int FormatVersion() const noexcept { + return format_version_; + } + + int AdapterVersion() const noexcept { + return adapter_version_; + } + + int ModelVersion() const noexcept { + return model_version_; + } + + py::dict GetParameters() const noexcept { + return parameters_; + } + + private: + int format_version_; + int adapter_version_; + int model_version_; + py::dict parameters_; + }; + + py::class_ adapter_binding(m, "LoraAdapter"); + adapter_binding.def(py::init()); + adapter_binding.def("get_format_version", [](PyAdapter* py_adapter) -> int { + return py_adapter->FormatVersion(); + }); + adapter_binding.def("get_adapter_version", [](PyAdapter* py_adapter) -> int { + return py_adapter->AdapterVersion(); + }); + adapter_binding.def("get_model_version", [](PyAdapter* py_adapter) -> int { + return py_adapter->ModelVersion(); + }); + adapter_binding.def("get_arameters", [](PyAdapter* py_adapter) -> py::dict { + return py_adapter->GetParameters(); + }); + + m.def("read_adapter", [](const std::string& file_name) -> std::unique_ptr { + lora::LoadedAdapter adapter; + adapter.MemoryMap(file_name); + + auto [begin, end] = adapter.GetParamIterators(); + py::dict params; + for (; begin != end; ++begin) { + const auto& [name, param] = *begin; + const auto& tensor = param.GetMapped().Get(); + + const auto onnx_type = tensor.GetElementType(); + const auto size_bytes = tensor.SizeInBytes(); + + py::dtype dtype = ConstructDType(onnx_type); + py::array npy_array(dtype, tensor.Shape().GetDims()); + ORT_ENFORCE(npy_array.size(), tensor.Shape().Size()); + memcpy_s(npy_array.mutable_data(), size_bytes, tensor.DataRaw(), size_bytes); + params[py::str(name)] = std::move(npy_array); + } + + auto py_adapter = std::make_unique(adapter.FormatVersion(), adapter.AdapterVersion(), + adapter.ModelVersion(), std::move(params)); + return py_adapter; + }); } } // namespace python diff --git a/onnxruntime/python/onnxruntime_pybind_state.h b/onnxruntime/python/onnxruntime_pybind_state.h index fc9ef83d7a0d3..d3cf40609d17b 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.h +++ b/onnxruntime/python/onnxruntime_pybind_state.h @@ -9,7 +9,7 @@ namespace python { void addGlobalMethods(py::module& m, Environment& env); void addObjectMethods(py::module& m, Environment& env); void addOrtValueMethods(pybind11::module& m); -void AddLoraMethods(pybind11::module& m); +void AddAdapterMethods(pybind11::module& m); } // namespace python } // namespace onnxruntime diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index 65a49865fed2d..62261df4dadb3 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -78,7 +78,7 @@ struct ReadAndValidateData { } }; -auto verify_load = [](const lora::LoraAdapter& adapter) { +auto verify_load = [](const lora::LoadedAdapter& adapter) { ASSERT_EQ(kAdapterVersion, adapter.AdapterVersion()); ASSERT_EQ(kModelVersion, adapter.ModelVersion()); @@ -164,7 +164,7 @@ template struct TestDataType { void operator()() const { const auto test_params = GenerateTestParameters()(); - lora::LoraAdapter lora_adapter; + lora::LoadedAdapter lora_adapter; lora_adapter.Load(std::move(test_params)); verify_load(lora_adapter); } From d58c4504260c5a229b019cbf3cc90bea5affb121 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Mon, 9 Sep 2024 19:15:52 -0700 Subject: [PATCH 36/84] Rename namespace to onnxruntime.adapters --- .../{lora_format => adapter_format}/README.md | 0 .../adapter_schema.fbs} | 2 +- .../adapter_schema.fbs.h} | 52 +++++++++---------- .../compile_schema.py | 2 +- ...ormat_utils.cc => adapter_format_utils.cc} | 18 +++---- ..._format_utils.h => adapter_format_utils.h} | 12 ++--- onnxruntime/lora/adapter_format_version.h | 33 ++++++++++++ onnxruntime/lora/lora_adapters.h | 4 +- onnxruntime/lora/lora_format_version.h | 33 ------------ onnxruntime/python/onnxruntime_pybind_lora.cc | 6 +-- onnxruntime/test/lora/lora_test.cc | 12 ++--- 11 files changed, 87 insertions(+), 87 deletions(-) rename onnxruntime/lora/{lora_format => adapter_format}/README.md (100%) rename onnxruntime/lora/{lora_format/lora_schema.fbs => adapter_format/adapter_schema.fbs} (96%) rename onnxruntime/lora/{lora_format/lora_schema.fbs.h => adapter_format/adapter_schema.fbs.h} (83%) rename onnxruntime/lora/{lora_format => adapter_format}/compile_schema.py (96%) rename onnxruntime/lora/{lora_format_utils.cc => adapter_format_utils.cc} (92%) rename onnxruntime/lora/{lora_format_utils.h => adapter_format_utils.h} (92%) create mode 100644 onnxruntime/lora/adapter_format_version.h delete mode 100644 onnxruntime/lora/lora_format_version.h diff --git a/onnxruntime/lora/lora_format/README.md b/onnxruntime/lora/adapter_format/README.md similarity index 100% rename from onnxruntime/lora/lora_format/README.md rename to onnxruntime/lora/adapter_format/README.md diff --git a/onnxruntime/lora/lora_format/lora_schema.fbs b/onnxruntime/lora/adapter_format/adapter_schema.fbs similarity index 96% rename from onnxruntime/lora/lora_format/lora_schema.fbs rename to onnxruntime/lora/adapter_format/adapter_schema.fbs index 37e8195dab6f2..cb0e4415d1555 100644 --- a/onnxruntime/lora/lora_format/lora_schema.fbs +++ b/onnxruntime/lora/adapter_format/adapter_schema.fbs @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -namespace onnxruntime.lora; +namespace onnxruntime.adapters; // Tensor enum TensorDataType : int32 { diff --git a/onnxruntime/lora/lora_format/lora_schema.fbs.h b/onnxruntime/lora/adapter_format/adapter_schema.fbs.h similarity index 83% rename from onnxruntime/lora/lora_format/lora_schema.fbs.h rename to onnxruntime/lora/adapter_format/adapter_schema.fbs.h index a75082af811fc..b361a4e35f465 100644 --- a/onnxruntime/lora/lora_format/lora_schema.fbs.h +++ b/onnxruntime/lora/adapter_format/adapter_schema.fbs.h @@ -1,8 +1,8 @@ // automatically generated by the FlatBuffers compiler, do not modify -#ifndef FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_H_ -#define FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_H_ +#ifndef FLATBUFFERS_GENERATED_ADAPTERSCHEMA_ONNXRUNTIME_ADAPTERS_H_ +#define FLATBUFFERS_GENERATED_ADAPTERSCHEMA_ONNXRUNTIME_ADAPTERS_H_ #include "flatbuffers/flatbuffers.h" @@ -14,7 +14,7 @@ static_assert(FLATBUFFERS_VERSION_MAJOR == 23 && "Non-compatible flatbuffers version included"); namespace onnxruntime { -namespace lora { +namespace adapters { struct Parameter; struct ParameterBuilder; @@ -123,8 +123,8 @@ struct Parameter FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { const ::flatbuffers::Vector *dims() const { return GetPointer *>(VT_DIMS); } - onnxruntime::lora::TensorDataType data_type() const { - return static_cast(GetField(VT_DATA_TYPE, 0)); + onnxruntime::adapters::TensorDataType data_type() const { + return static_cast(GetField(VT_DATA_TYPE, 0)); } const ::flatbuffers::Vector *raw_data() const { return GetPointer *>(VT_RAW_DATA); @@ -152,7 +152,7 @@ struct ParameterBuilder { void add_dims(::flatbuffers::Offset<::flatbuffers::Vector> dims) { fbb_.AddOffset(Parameter::VT_DIMS, dims); } - void add_data_type(onnxruntime::lora::TensorDataType data_type) { + void add_data_type(onnxruntime::adapters::TensorDataType data_type) { fbb_.AddElement(Parameter::VT_DATA_TYPE, static_cast(data_type), 0); } void add_raw_data(::flatbuffers::Offset<::flatbuffers::Vector> raw_data) { @@ -173,7 +173,7 @@ inline ::flatbuffers::Offset CreateParameter( ::flatbuffers::FlatBufferBuilder &_fbb, ::flatbuffers::Offset<::flatbuffers::String> name = 0, ::flatbuffers::Offset<::flatbuffers::Vector> dims = 0, - onnxruntime::lora::TensorDataType data_type = onnxruntime::lora::TensorDataType::UNDEFINED, + onnxruntime::adapters::TensorDataType data_type = onnxruntime::adapters::TensorDataType::UNDEFINED, ::flatbuffers::Offset<::flatbuffers::Vector> raw_data = 0) { ParameterBuilder builder_(_fbb); builder_.add_raw_data(raw_data); @@ -187,13 +187,13 @@ inline ::flatbuffers::Offset CreateParameterDirect( ::flatbuffers::FlatBufferBuilder &_fbb, const char *name = nullptr, const std::vector *dims = nullptr, - onnxruntime::lora::TensorDataType data_type = onnxruntime::lora::TensorDataType::UNDEFINED, + onnxruntime::adapters::TensorDataType data_type = onnxruntime::adapters::TensorDataType::UNDEFINED, const std::vector *raw_data = nullptr) { auto name__ = name ? _fbb.CreateString(name) : 0; auto dims__ = dims ? _fbb.CreateVector(*dims) : 0; if (raw_data) { _fbb.ForceVectorAlignment(raw_data->size(), sizeof(uint8_t), 8); } auto raw_data__ = raw_data ? _fbb.CreateVector(*raw_data) : 0; - return onnxruntime::lora::CreateParameter( + return onnxruntime::adapters::CreateParameter( _fbb, name__, dims__, @@ -218,8 +218,8 @@ struct Adapter FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { int32_t model_version() const { return GetField(VT_MODEL_VERSION, 0); } - const ::flatbuffers::Vector<::flatbuffers::Offset> *parameters() const { - return GetPointer> *>(VT_PARAMETERS); + const ::flatbuffers::Vector<::flatbuffers::Offset> *parameters() const { + return GetPointer> *>(VT_PARAMETERS); } bool Verify(::flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && @@ -246,7 +246,7 @@ struct AdapterBuilder { void add_model_version(int32_t model_version) { fbb_.AddElement(Adapter::VT_MODEL_VERSION, model_version, 0); } - void add_parameters(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> parameters) { + void add_parameters(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> parameters) { fbb_.AddOffset(Adapter::VT_PARAMETERS, parameters); } explicit AdapterBuilder(::flatbuffers::FlatBufferBuilder &_fbb) @@ -265,7 +265,7 @@ inline ::flatbuffers::Offset CreateAdapter( int32_t format_version = 0, int32_t adapter_version = 0, int32_t model_version = 0, - ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> parameters = 0) { + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> parameters = 0) { AdapterBuilder builder_(_fbb); builder_.add_parameters(parameters); builder_.add_model_version(model_version); @@ -279,9 +279,9 @@ inline ::flatbuffers::Offset CreateAdapterDirect( int32_t format_version = 0, int32_t adapter_version = 0, int32_t model_version = 0, - const std::vector<::flatbuffers::Offset> *parameters = nullptr) { - auto parameters__ = parameters ? _fbb.CreateVector<::flatbuffers::Offset>(*parameters) : 0; - return onnxruntime::lora::CreateAdapter( + const std::vector<::flatbuffers::Offset> *parameters = nullptr) { + auto parameters__ = parameters ? _fbb.CreateVector<::flatbuffers::Offset>(*parameters) : 0; + return onnxruntime::adapters::CreateAdapter( _fbb, format_version, adapter_version, @@ -289,12 +289,12 @@ inline ::flatbuffers::Offset CreateAdapterDirect( parameters__); } -inline const onnxruntime::lora::Adapter *GetAdapter(const void *buf) { - return ::flatbuffers::GetRoot(buf); +inline const onnxruntime::adapters::Adapter *GetAdapter(const void *buf) { + return ::flatbuffers::GetRoot(buf); } -inline const onnxruntime::lora::Adapter *GetSizePrefixedAdapter(const void *buf) { - return ::flatbuffers::GetSizePrefixedRoot(buf); +inline const onnxruntime::adapters::Adapter *GetSizePrefixedAdapter(const void *buf) { + return ::flatbuffers::GetSizePrefixedRoot(buf); } inline const char *AdapterIdentifier() { @@ -313,27 +313,27 @@ inline bool SizePrefixedAdapterBufferHasIdentifier(const void *buf) { inline bool VerifyAdapterBuffer( ::flatbuffers::Verifier &verifier) { - return verifier.VerifyBuffer(AdapterIdentifier()); + return verifier.VerifyBuffer(AdapterIdentifier()); } inline bool VerifySizePrefixedAdapterBuffer( ::flatbuffers::Verifier &verifier) { - return verifier.VerifySizePrefixedBuffer(AdapterIdentifier()); + return verifier.VerifySizePrefixedBuffer(AdapterIdentifier()); } inline void FinishAdapterBuffer( ::flatbuffers::FlatBufferBuilder &fbb, - ::flatbuffers::Offset root) { + ::flatbuffers::Offset root) { fbb.Finish(root, AdapterIdentifier()); } inline void FinishSizePrefixedAdapterBuffer( ::flatbuffers::FlatBufferBuilder &fbb, - ::flatbuffers::Offset root) { + ::flatbuffers::Offset root) { fbb.FinishSizePrefixed(root, AdapterIdentifier()); } -} // namespace lora +} // namespace adapters } // namespace onnxruntime -#endif // FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_H_ +#endif // FLATBUFFERS_GENERATED_ADAPTERSCHEMA_ONNXRUNTIME_ADAPTERS_H_ diff --git a/onnxruntime/lora/lora_format/compile_schema.py b/onnxruntime/lora/adapter_format/compile_schema.py similarity index 96% rename from onnxruntime/lora/lora_format/compile_schema.py rename to onnxruntime/lora/adapter_format/compile_schema.py index bee53885a2005..48090c2c2f7d0 100644 --- a/onnxruntime/lora/lora_format/compile_schema.py +++ b/onnxruntime/lora/adapter_format/compile_schema.py @@ -44,7 +44,7 @@ def main(): args = parser.parse_args() languages = args.languages if args.languages is not None else all_languages flatc = args.flatc.resolve(strict=True) - schema_path = SCRIPT_DIR / "lora_schema.fbs" + schema_path = SCRIPT_DIR / "adapter_schema.fbs" if "cpp" in languages: generate_cpp(flatc, schema_path) diff --git a/onnxruntime/lora/lora_format_utils.cc b/onnxruntime/lora/adapter_format_utils.cc similarity index 92% rename from onnxruntime/lora/lora_format_utils.cc rename to onnxruntime/lora/adapter_format_utils.cc index 9a4c1ce6f2415..1d6f3e3da98cb 100644 --- a/onnxruntime/lora/lora_format_utils.cc +++ b/onnxruntime/lora/adapter_format_utils.cc @@ -1,8 +1,8 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "lora_format_utils.h" -#include "lora_format_version.h" +#include "adapter_format_utils.h" +#include "adapter_format_version.h" #include "core/common/common.h" #include "core/common/span_utils.h" @@ -14,10 +14,10 @@ #include namespace onnxruntime { -namespace lora { +namespace adapters { namespace utils { -bool IsLoraFormatModelBytes(const void* bytes, size_t num_bytes) { +bool IsAdapterFormatModelBytes(const void* bytes, size_t num_bytes) { return num_bytes > 8 && // check buffer is large enough to contain identifier so we don't read random memory AdapterBufferHasIdentifier(bytes); } @@ -65,7 +65,7 @@ std::pair MemoryMapAdapterFile(const std::filesyst } const Adapter* ValidateAndGetAdapterFromBytes(gsl::span bytes) { - if (!IsLoraFormatModelBytes(bytes.data(), bytes.size())) { + if (!IsAdapterFormatModelBytes(bytes.data(), bytes.size())) { ORT_THROW("The buffer does not appear to be a valid lora parameter format"); } @@ -75,7 +75,7 @@ const Adapter* ValidateAndGetAdapterFromBytes(gsl::span bytes) { } auto* adapter = GetAdapter(bytes.data()); - if (!IsLoraFormatVersionSupported(adapter->format_version())) { + if (!IsAdapterFormatVersionSupported(adapter->format_version())) { ORT_THROW("Unsupported lora format version"); } @@ -124,7 +124,7 @@ OrtValue CreateOrtValueOnDevice(const OrtValue& ort_value_mapped, const Allocato return result; } -void AdapterFormatBuilder::AddParameter(const std::string& name, lora::TensorDataType data_type, +void AdapterFormatBuilder::AddParameter(const std::string& name, TensorDataType data_type, gsl::span shape, gsl::span data) { flatbuffers::Offset fbs_param; SaveLoraParameter(builder_, name, data_type, shape, data, fbs_param); @@ -148,9 +148,9 @@ gsl::span AdapterFormatBuilder::FinishWithSpan(int adapter_version, int void AdapterFormatBuilder::FinishImpl(int adapter_version, int model_version) { auto fbs_params = builder_.CreateVector(params_); - auto fbs_adapter = lora::CreateAdapter(builder_, lora::kLoraFormatVersion, adapter_version, + auto fbs_adapter = CreateAdapter(builder_, kAdapterFormatVersion, adapter_version, model_version, fbs_params); - builder_.Finish(fbs_adapter, lora::AdapterIdentifier()); + builder_.Finish(fbs_adapter, AdapterIdentifier()); } } // namespace utils diff --git a/onnxruntime/lora/lora_format_utils.h b/onnxruntime/lora/adapter_format_utils.h similarity index 92% rename from onnxruntime/lora/lora_format_utils.h rename to onnxruntime/lora/adapter_format_utils.h index e7e341945f2ca..95263a0a3e128 100644 --- a/onnxruntime/lora/lora_format_utils.h +++ b/onnxruntime/lora/adapter_format_utils.h @@ -10,7 +10,7 @@ #include #include -#include "lora_format/lora_schema.fbs.h" +#include "adapter_format/adapter_schema.fbs.h" #include #include @@ -20,7 +20,7 @@ struct OrtValue; namespace onnxruntime { -namespace lora { +namespace adapters { namespace utils { /// @@ -37,7 +37,7 @@ class AdapterFormatBuilder { /// /// /// - void AddParameter(const std::string& name, lora::TensorDataType data_type, + void AddParameter(const std::string& name, adapters::TensorDataType data_type, gsl::span shape, gsl::span data); /// @@ -60,7 +60,7 @@ class AdapterFormatBuilder { void FinishImpl(int adapter_version, int model_version); flatbuffers::FlatBufferBuilder builder_; - std::vector> params_; + std::vector> params_; }; /// @@ -69,7 +69,7 @@ class AdapterFormatBuilder { /// /// /// -bool IsLoraFormatModelBytes(const void* bytes, size_t num_bytes); +bool IsAdapterFormatModelBytes(const void* bytes, size_t num_bytes); // Will only create string in flatbuffers when has_string is true flatbuffers::Offset SaveStringToLoraFormat(flatbuffers::FlatBufferBuilder& builder, @@ -110,7 +110,7 @@ const Adapter* ValidateAndGetAdapterFromBytes(gsl::span bytes); /// /// output offset void SaveLoraParameter(flatbuffers::FlatBufferBuilder& flat_builder, std::string_view name, - lora::TensorDataType data_type, + TensorDataType data_type, gsl::span shape, gsl::span data, flatbuffers::Offset& fbs_tensor); diff --git a/onnxruntime/lora/adapter_format_version.h b/onnxruntime/lora/adapter_format_version.h new file mode 100644 index 0000000000000..a636911e36087 --- /dev/null +++ b/onnxruntime/lora/adapter_format_version.h @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include +#include + +namespace onnxruntime { +namespace adapters { + +// The current model versions for saving lora parameters in flatbuffers +// Once this version is updated, the kSupportedAdapterFormatVersions in IsGenAiLoraFormatModelBytes +// below will also need to be updated. +// See src/flatbuffers/schema/README.md for more details on versioning. +// Version 1 - history begins +constexpr const int kAdapterFormatVersion = 1; + +// Check if the given lora format version is supported in this build +inline bool IsAdapterFormatVersionSupported(const int lora_format_version) { + // The lora format versions we will support in this build + // This may contain more versions than the kAdapterFormatVersion, based on the compatibilities + static constexpr std::array kSupportedAdapterFormatVersions{ + kAdapterFormatVersion, + }; + + const auto it = + std::find(kSupportedAdapterFormatVersions.begin(), kSupportedAdapterFormatVersions.end(), lora_format_version); + return it != kSupportedAdapterFormatVersions.cend(); +} + +} // namespace lora +} // namespace onnxruntime diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h index 3298031b73bd2..47aaff09ac318 100644 --- a/onnxruntime/lora/lora_adapters.h +++ b/onnxruntime/lora/lora_adapters.h @@ -9,7 +9,7 @@ #include "core/framework/ort_value.h" #include "core/platform/env.h" -#include "lora/lora_format_utils.h" +#include "lora/adapter_format_utils.h" #include #include @@ -152,7 +152,7 @@ class LoadedAdapter { std::variant buffer_; AllocatorPtr device_allocator_; - const Adapter* adapter_{nullptr}; + const adapters::Adapter* adapter_{nullptr}; InlinedHashMap params_values_; }; diff --git a/onnxruntime/lora/lora_format_version.h b/onnxruntime/lora/lora_format_version.h deleted file mode 100644 index 9c90a86b16382..0000000000000 --- a/onnxruntime/lora/lora_format_version.h +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#pragma once - -#include -#include - -namespace onnxruntime { -namespace lora { - -// The current model versions for saving lora parameters in flatbuffers -// Once this version is updated, the kSupportedLoraFormatVersions in IsGenAiLoraFormatModelBytes -// below will also need to be updated. -// See src/flatbuffers/schema/README.md for more details on versioning. -// Version 1 - history begins -constexpr const int kLoraFormatVersion = 1; - -// Check if the given lora format version is supported in this build -inline bool IsLoraFormatVersionSupported(const int lora_format_version) { - // The lora format versions we will support in this build - // This may contain more versions than the kLoraFormatVersion, based on the compatibilities - static constexpr std::array kSupportedLoraFormatVersions{ - kLoraFormatVersion, - }; - - const auto it = - std::find(kSupportedLoraFormatVersions.begin(), kSupportedLoraFormatVersions.end(), lora_format_version); - return it != kSupportedLoraFormatVersions.cend(); -} - -} // namespace lora -} // namespace onnxruntime diff --git a/onnxruntime/python/onnxruntime_pybind_lora.cc b/onnxruntime/python/onnxruntime_pybind_lora.cc index 810f0196baaec..d843a8fbf03ab 100644 --- a/onnxruntime/python/onnxruntime_pybind_lora.cc +++ b/onnxruntime/python/onnxruntime_pybind_lora.cc @@ -14,7 +14,7 @@ #include "core/framework/ort_value.h" #include "core/framework/tensor.h" -#include "lora/lora_format_utils.h" +#include "lora/adapter_format_utils.h" #include "lora/lora_adapters.h" #include @@ -171,7 +171,7 @@ void AddAdapterMethods(pybind11::module& m) { ORT_THROW("Failed to open file:", file_name, " for writing."); } - lora::utils::AdapterFormatBuilder format_builder; + adapters::utils::AdapterFormatBuilder format_builder; for (const auto& [n, arr] : adapter_parameters) { const std::string param_name = py::str(n); py::array np_array = arr.cast(); @@ -206,7 +206,7 @@ void AddAdapterMethods(pybind11::module& m) { gsl::span data_span{reinterpret_cast(np_array.data()), static_cast(np_array.nbytes())}; - format_builder.AddParameter(param_name, static_cast(onnx_element_type), + format_builder.AddParameter(param_name, static_cast(onnx_element_type), shape_span, data_span); } auto format_span = format_builder.FinishWithSpan(adapter_version, model_version); diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index 62261df4dadb3..13a18e33f01ba 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -6,8 +6,8 @@ #include "core/framework/to_tensor_proto_element_type.h" #include "lora/lora_adapters.h" -#include "lora/lora_format_version.h" -#include "lora/lora_format_utils.h" +#include "lora/adapter_format_version.h" +#include "lora/adapter_format_utils.h" #include "gtest/gtest.h" #include @@ -150,10 +150,10 @@ struct GenerateTestParameters { std::iota(param_2.begin(), param_2.end(), T{32}); } - lora::utils::AdapterFormatBuilder adapter_builder; - adapter_builder.AddParameter("param_1", static_cast(data_type), + adapters::utils::AdapterFormatBuilder adapter_builder; + adapter_builder.AddParameter("param_1", static_cast(data_type), param_shape, ReinterpretAsSpan(gsl::make_span(param_1))); - adapter_builder.AddParameter("param_2", static_cast(data_type), + adapter_builder.AddParameter("param_2", static_cast(data_type), param_shape, ReinterpretAsSpan(gsl::make_span(param_2))); return adapter_builder.Finish(kAdapterVersion, kModelVersion); @@ -174,7 +174,7 @@ struct TestDataType { TEST(LoraAdapterTest, Load) { // Test different data types - const auto data_types = gsl::make_span(lora::EnumValuesTensorDataType()); + const auto data_types = gsl::make_span(adapters::EnumValuesTensorDataType()); for (size_t i = 1, size = data_types.size(); i < size; ++i) { if (i == 8 || i == 9 || i == 14 || i == 15 || (i > 16 && i < 21)) continue; From 59805ac351aeef8fecf49c6fc0738ca329790d13 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Tue, 10 Sep 2024 14:13:32 -0700 Subject: [PATCH 37/84] Expose python level Adapter class --- .../onnxruntime/core/framework/run_options.h | 4 +- onnxruntime/core/framework/run_options.cc | 2 +- onnxruntime/core/session/onnxruntime_c_api.cc | 6 +- onnxruntime/lora/lora_adapters.cc | 30 +++---- onnxruntime/lora/lora_adapters.h | 18 ++--- .../onnxruntime_inference_collection.py | 37 +++++++++ onnxruntime/python/onnxruntime_pybind_lora.cc | 79 +++++++++---------- .../python/onnxruntime_pybind_state.cc | 1 + onnxruntime/python/onnxruntime_pybind_state.h | 2 +- .../python/onnxruntime_pybind_state_common.h | 2 + onnxruntime/test/lora/lora_test.cc | 6 +- .../test/python/onnxruntime_test_python.py | 10 +++ 12 files changed, 121 insertions(+), 76 deletions(-) diff --git a/include/onnxruntime/core/framework/run_options.h b/include/onnxruntime/core/framework/run_options.h index 58e566fcd7166..ffe5c61f506c0 100644 --- a/include/onnxruntime/core/framework/run_options.h +++ b/include/onnxruntime/core/framework/run_options.h @@ -12,7 +12,7 @@ namespace onnxruntime { namespace lora { -class LoadedAdapter; +class LoraAdapter; } } // namespace onnxruntime @@ -48,7 +48,7 @@ struct OrtRunOptions { // /include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h onnxruntime::ConfigOptions config_options; - onnxruntime::InlinedVector active_adapters_; + onnxruntime::InlinedVector active_adapters_; OrtRunOptions() = default; ~OrtRunOptions() = default; diff --git a/onnxruntime/core/framework/run_options.cc b/onnxruntime/core/framework/run_options.cc index c061204b6e4ee..b0ea7f7c9d843 100644 --- a/onnxruntime/core/framework/run_options.cc +++ b/onnxruntime/core/framework/run_options.cc @@ -67,7 +67,7 @@ ORT_API_STATUS_IMPL(OrtApis::AddRunConfigEntry, _Inout_ OrtRunOptions* options, ORT_API_STATUS_IMPL(OrtApis::RunOptionsSetActiveLoraAdapter, _Inout_ OrtRunOptions* options, const _In_ OrtLoraAdapter* adapter) { API_IMPL_BEGIN - auto* lora_adapter = reinterpret_cast(adapter); + auto* lora_adapter = reinterpret_cast(adapter); options->active_adapters_.push_back(lora_adapter); return nullptr; API_IMPL_END diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index 85bad75e36141..0a037d1acb256 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -824,7 +824,7 @@ void CheckAndAdjustForLora(const OrtRunOptions* run_options, gsl::span& inputs) { if (!run_options->active_adapters_.empty()) { size_t total_lora_params = 0; - for (const lora::LoadedAdapter* ad : run_options->active_adapters_) { + for (const lora::LoraAdapter* ad : run_options->active_adapters_) { total_lora_params += ad->GetParamNum(); } @@ -833,8 +833,8 @@ void CheckAndAdjustForLora(const OrtRunOptions* run_options, std::copy(input_names.begin(), input_names.end(), std::back_inserter(input_names_with_lora)); std::copy(inputs.begin(), inputs.end(), std::back_inserter(input_with_lora)); - for (const lora::LoadedAdapter* ad : run_options->active_adapters_) { - ad->OutputLoadedAdaptersParameters(std::back_inserter(input_names_with_lora), + for (const lora::LoraAdapter* ad : run_options->active_adapters_) { + ad->OutputAdapterParameters(std::back_inserter(input_names_with_lora), std::back_inserter(input_with_lora)); } diff --git a/onnxruntime/lora/lora_adapters.cc b/onnxruntime/lora/lora_adapters.cc index 454ec3ad2b6c2..c517f5dbe1055 100644 --- a/onnxruntime/lora/lora_adapters.cc +++ b/onnxruntime/lora/lora_adapters.cc @@ -2,7 +2,7 @@ // Licensed under the MIT License. #include "lora_adapters.h" -#include "lora_format_utils.h" +#include "adapter_format_utils.h" #include "core/session/onnxruntime_c_api.h" #include "core/session/ort_apis.h" @@ -14,34 +14,34 @@ namespace onnxruntime { namespace lora { -LoadedAdapter::Param::Param(OrtValue ort_value_mapped) noexcept +LoraAdapter::Param::Param(OrtValue ort_value_mapped) noexcept : ort_value_mapped_(std::move(ort_value_mapped)) {} -LoadedAdapter::Param::Param(OrtValue ort_value_mapped, OrtValue ort_value_device) noexcept +LoraAdapter::Param::Param(OrtValue ort_value_mapped, OrtValue ort_value_device) noexcept : ort_value_mapped_(std::move(ort_value_mapped)), ort_value_device_(std::move(ort_value_device)) { } -void LoadedAdapter::Load(const std::filesystem::path& file_path) { - auto buffer = utils::LoadLoraAdapterBytes(file_path); +void LoraAdapter::Load(const std::filesystem::path& file_path) { + auto buffer = adapters::utils::LoadLoraAdapterBytes(file_path); Load(std::move(buffer)); } -void LoadedAdapter::Load(std::vector buffer) { - adapter_ = utils::ValidateAndGetAdapterFromBytes(buffer); +void LoraAdapter::Load(std::vector buffer) { + adapter_ = adapters::utils::ValidateAndGetAdapterFromBytes(buffer); buffer_.emplace(std::move(buffer)); InitializeParamsValues(); } -void LoadedAdapter::MemoryMap(const std::filesystem::path& file_path) { - auto [mapped_memory, file_size] = utils::MemoryMapAdapterFile(file_path); +void LoraAdapter::MemoryMap(const std::filesystem::path& file_path) { + auto [mapped_memory, file_size] = adapters::utils::MemoryMapAdapterFile(file_path); auto u8_span = ReinterpretAsSpan(gsl::make_span(mapped_memory.get(), file_size)); - adapter_ = utils::ValidateAndGetAdapterFromBytes(u8_span); + adapter_ = adapters::utils::ValidateAndGetAdapterFromBytes(u8_span); buffer_.emplace(std::move(mapped_memory), file_size); InitializeParamsValues(); } -void LoadedAdapter::InitializeParamsValues() { +void LoraAdapter::InitializeParamsValues() { if (adapter_ == nullptr) { ORT_THROW("Adapter is not loaded yet."); } @@ -50,14 +50,14 @@ void LoadedAdapter::InitializeParamsValues() { InlinedHashMap params_values; params_values.reserve(params->size()); for (const auto* param : *params) { - auto [name, ort_value] = utils::CreateOrtValueOverLoraParameter(*param); + auto [name, ort_value] = adapters::utils::CreateOrtValueOverLoraParameter(*param); Param lora_param(std::move(ort_value)); params_values.emplace(std::move(name), std::move(lora_param)); } params_values_.swap(params_values); } -size_t LoadedAdapter::GetBufferSize() const { +size_t LoraAdapter::GetBufferSize() const { if (std::holds_alternative(buffer_)) { return std::get<1>(buffer_).file_size_; } else if (std::holds_alternative(buffer_)) { @@ -72,7 +72,7 @@ size_t LoadedAdapter::GetBufferSize() const { ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapter, const ORTCHAR_T* adapter_file_path, _In_ OrtAllocator* /* allocator */, _Outptr_ OrtLoraAdapter** adapter) { API_IMPL_BEGIN - auto lora_adapter = std::make_unique(); + auto lora_adapter = std::make_unique(); // For platforms that do not support Memmap, we can #ifdef it to ->Load(adapter_file_path) lora_adapter->Load(adapter_file_path); *adapter = reinterpret_cast(lora_adapter.release()); @@ -81,5 +81,5 @@ ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapter, const ORTCHAR_T* adapter_file_pa } ORT_API(void, OrtApis::ReleaseLoraAdapter, _Frees_ptr_opt_ OrtLoraAdapter* adapter) { - delete reinterpret_cast(adapter); + delete reinterpret_cast(adapter); } diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h index 47aaff09ac318..8969dff6fcaf9 100644 --- a/onnxruntime/lora/lora_adapters.h +++ b/onnxruntime/lora/lora_adapters.h @@ -22,17 +22,17 @@ namespace lora { /// /// Container to hold and access Lora Parameters /// -class LoadedAdapter { +class LoraAdapter { public: - LoadedAdapter() = default; - explicit LoadedAdapter(AllocatorPtr device_allocator) + LoraAdapter() = default; + explicit LoraAdapter(AllocatorPtr device_allocator) : device_allocator_(std::move(device_allocator)) {} - ~LoadedAdapter() = default; - LoadedAdapter(const LoadedAdapter&) = delete; - LoadedAdapter& operator=(const LoadedAdapter&) = delete; + ~LoraAdapter() = default; + LoraAdapter(const LoraAdapter&) = delete; + LoraAdapter& operator=(const LoraAdapter&) = delete; - LoadedAdapter(LoadedAdapter&&) = default; - LoadedAdapter& operator=(LoadedAdapter&&) = default; + LoraAdapter(LoraAdapter&&) = default; + LoraAdapter& operator=(LoraAdapter&&) = default; /// /// Represents a named lora parameter (tensor) @@ -122,7 +122,7 @@ class LoadedAdapter { /// output iterator that accepts const char* /// output iterator that accepts const OrtValue* template - void OutputLoadedAdaptersParameters(NamesOutputIter names_out, + void OutputAdapterParameters(NamesOutputIter names_out, TensorOutputIter tensor_out) const { for (const auto& [name, param] : params_values_) { *names_out = name.c_str(); diff --git a/onnxruntime/python/onnxruntime_inference_collection.py b/onnxruntime/python/onnxruntime_inference_collection.py index c3cfe2c97ae95..98ddf0027b86d 100644 --- a/onnxruntime/python/onnxruntime_inference_collection.py +++ b/onnxruntime/python/onnxruntime_inference_collection.py @@ -31,6 +31,43 @@ def get_ort_device_type(device_type: str, device_index) -> C.OrtDevice: else: raise Exception("Unsupported device type: " + device_type) +class Adapter: + """ + Instances of this class are used to represent adapter information + obtained from read_adapter(). + """ + def __init__(self, adapter): + self._adapter = adapter + + @staticmethod + def read_adapter(file_path: os.PathLike) -> Adapter: + return Adapter(C.read_adapter(file_path)) + + @staticmethod + def export_adapter(file_path: os.PathLike, adapter_version: int, model_version: int, + params: dict[str, Sequence[Any]]): + """ + This function takes in the parameters and writes a file at the specified location + in onnxrunitme adapter format containing Lora parameters. + :param file_path: absolute path for the adapter + :param adapter_version: the version of the adapter + :param model_version: the version of the model this adapter is being created + :param params: a dictionary of string -> numpy array containing adapter parameters + """ + C.export_adapter(file_path, adapter_version, model_version, params) + + def get_format_version(self): + return self._adapter.get_format_version() + + def get_adapter_version(self): + return self._adapter.get_format_version() + + def get_model_version(self): + return self._adapter.get_model_version() + + def get_parameters(self) -> dict[str, Sequence[Any]]: + return self._adapter.get_parameters() + def check_and_normalize_provider_args( providers: Sequence[str | tuple[str, dict[Any, Any]]] | None, diff --git a/onnxruntime/python/onnxruntime_pybind_lora.cc b/onnxruntime/python/onnxruntime_pybind_lora.cc index d843a8fbf03ab..d38f8e64768f5 100644 --- a/onnxruntime/python/onnxruntime_pybind_lora.cc +++ b/onnxruntime/python/onnxruntime_pybind_lora.cc @@ -163,7 +163,7 @@ py::dtype ConstructDType(int32_t onnx_type) { } // namespace -void AddAdapterMethods(pybind11::module& m) { +void addAdapterMethods(pybind11::module& m) { m.def( "export_adapter", [](const std::string& file_name, int adapter_version, int model_version, const pybind11::dict& adapter_parameters) { std::ofstream file(file_name, std::ios::binary); @@ -202,9 +202,9 @@ void AddAdapterMethods(pybind11::module& m) { }; gsl::span shape_span{reinterpret_cast(np_array.shape()), - static_cast(np_array.ndim())}; + narrow(np_array.ndim())}; gsl::span data_span{reinterpret_cast(np_array.data()), - static_cast(np_array.nbytes())}; + narrow(np_array.nbytes())}; format_builder.AddParameter(param_name, static_cast(onnx_element_type), shape_span, data_span); @@ -218,7 +218,7 @@ void AddAdapterMethods(pybind11::module& m) { ORT_THROW("Failed to flush :", file_name, " on close"); } }, - "Save adapter parameters into a lora file format. "); + "Save adapter parameters into a lora file format."); class PyAdapter { public: @@ -248,45 +248,40 @@ void AddAdapterMethods(pybind11::module& m) { py::dict parameters_; }; - py::class_ adapter_binding(m, "LoraAdapter"); - adapter_binding.def(py::init()); - adapter_binding.def("get_format_version", [](PyAdapter* py_adapter) -> int { - return py_adapter->FormatVersion(); - }); - adapter_binding.def("get_adapter_version", [](PyAdapter* py_adapter) -> int { - return py_adapter->AdapterVersion(); - }); - adapter_binding.def("get_model_version", [](PyAdapter* py_adapter) -> int { - return py_adapter->ModelVersion(); - }); - adapter_binding.def("get_arameters", [](PyAdapter* py_adapter) -> py::dict { - return py_adapter->GetParameters(); - }); - - m.def("read_adapter", [](const std::string& file_name) -> std::unique_ptr { - lora::LoadedAdapter adapter; - adapter.MemoryMap(file_name); - - auto [begin, end] = adapter.GetParamIterators(); - py::dict params; - for (; begin != end; ++begin) { - const auto& [name, param] = *begin; - const auto& tensor = param.GetMapped().Get(); - - const auto onnx_type = tensor.GetElementType(); - const auto size_bytes = tensor.SizeInBytes(); - - py::dtype dtype = ConstructDType(onnx_type); - py::array npy_array(dtype, tensor.Shape().GetDims()); - ORT_ENFORCE(npy_array.size(), tensor.Shape().Size()); - memcpy_s(npy_array.mutable_data(), size_bytes, tensor.DataRaw(), size_bytes); - params[py::str(name)] = std::move(npy_array); - } + m.def( + "read_adapter", [](const std::string& file_name) -> std::unique_ptr { + lora::LoraAdapter adapter; + adapter.MemoryMap(file_name); + + auto [begin, end] = adapter.GetParamIterators(); + py::dict params; + for (; begin != end; ++begin) { + const auto& [name, param] = *begin; + const auto& tensor = param.GetMapped().Get(); + + const auto onnx_type = tensor.GetElementType(); + const auto size_bytes = tensor.SizeInBytes(); + + py::dtype dtype = ConstructDType(onnx_type); + // No pointer, memory is allocated by array + py::array npy_array(dtype, tensor.Shape().GetDims()); + ORT_ENFORCE(npy_array.size(), tensor.Shape().Size()); + memcpy_s(npy_array.mutable_data(), size_bytes, tensor.DataRaw(), size_bytes); + params[py::str(name)] = std::move(npy_array); + } - auto py_adapter = std::make_unique(adapter.FormatVersion(), adapter.AdapterVersion(), - adapter.ModelVersion(), std::move(params)); - return py_adapter; - }); + auto py_adapter = std::make_unique(adapter.FormatVersion(), adapter.AdapterVersion(), + adapter.ModelVersion(), std::move(params)); + return py_adapter; + }, + "The function returns an instance of the class that contains a dictionary of name -> numpy arrays"); + + py::class_ adapter_binding(m, "Adapter"); + adapter_binding.def(py::init()) + .def("get_format_version", &PyAdapter::GetParameters) + .def("get_adapter_version", &PyAdapter::AdapterVersion) + .def("get_model_version", &PyAdapter::ModelVersion) + .def("get_parameters", &PyAdapter::GetParameters); } } // namespace python diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 47b8d75f22aea..1d0dc0e1be02e 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -2243,6 +2243,7 @@ bool CreateInferencePybindStateModule(py::module& m) { addOrtValueMethods(m); addSparseTensorMethods(m); addIoBindingMethods(m); + addAdapterMethods(m); #if !defined(__APPLE__) && !defined(ORT_MINIMAL_BUILD) if (!InitProvidersSharedLibrary()) { diff --git a/onnxruntime/python/onnxruntime_pybind_state.h b/onnxruntime/python/onnxruntime_pybind_state.h index d3cf40609d17b..9c6f97b407fc8 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.h +++ b/onnxruntime/python/onnxruntime_pybind_state.h @@ -9,7 +9,7 @@ namespace python { void addGlobalMethods(py::module& m, Environment& env); void addObjectMethods(py::module& m, Environment& env); void addOrtValueMethods(pybind11::module& m); -void AddAdapterMethods(pybind11::module& m); +void addAdapterMethods(pybind11::module& m); } // namespace python } // namespace onnxruntime diff --git a/onnxruntime/python/onnxruntime_pybind_state_common.h b/onnxruntime/python/onnxruntime_pybind_state_common.h index 4d6e411defae3..5cff369e9ee43 100644 --- a/onnxruntime/python/onnxruntime_pybind_state_common.h +++ b/onnxruntime/python/onnxruntime_pybind_state_common.h @@ -394,6 +394,8 @@ void addIoBindingMethods(pybind11::module& m); void addSparseTensorMethods(pybind11::module& m); +void addAdapterMethods(pybind11::module& m); + void addGlobalSchemaFunctions(pybind11::module& m); void addOpKernelSubmodule(pybind11::module& m); diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index 13a18e33f01ba..c5f9138f280dc 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -78,7 +78,7 @@ struct ReadAndValidateData { } }; -auto verify_load = [](const lora::LoadedAdapter& adapter) { +auto verify_load = [](const lora::LoraAdapter& adapter) { ASSERT_EQ(kAdapterVersion, adapter.AdapterVersion()); ASSERT_EQ(kModelVersion, adapter.ModelVersion()); @@ -90,7 +90,7 @@ auto verify_load = [](const lora::LoadedAdapter& adapter) { names.reserve(param_num); ort_values.reserve(param_num); - adapter.OutputLoadedAdaptersParameters(std::back_inserter(names), std::back_inserter(ort_values)); + adapter.OutputAdapterParameters(std::back_inserter(names), std::back_inserter(ort_values)); ASSERT_EQ(param_num, names.size()); ASSERT_EQ(param_num, ort_values.size()); @@ -164,7 +164,7 @@ template struct TestDataType { void operator()() const { const auto test_params = GenerateTestParameters()(); - lora::LoadedAdapter lora_adapter; + lora::LoraAdapter lora_adapter; lora_adapter.Load(std::move(test_params)); verify_load(lora_adapter); } diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py index feabd648f8385..903034d2bc062 100644 --- a/onnxruntime/test/python/onnxruntime_test_python.py +++ b/onnxruntime/test/python/onnxruntime_test_python.py @@ -1824,6 +1824,16 @@ def test_multiple_devices(self): device1_session.run(output_names=["Plus214_Output_0"], input_feed=image) device0_session.run(output_names=["Plus214_Output_0"], input_feed=image) + def test_adater_export_read(self): + adapter_version = 1 + model_version = 1 + exported_adapter_file = "test_adapter.onnx_adapter" + + values = np.array( # noqa: N806 + [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0], dtype=np.float) + + + os.remove(exported_adapter_file) if __name__ == "__main__": unittest.main(verbosity=1) From b5f3633adddaef22247cc12787e1a0919673bbb8 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Tue, 10 Sep 2024 14:55:04 -0700 Subject: [PATCH 38/84] Lint --- .../core/session/onnxruntime_cxx_api.h | 4 +- onnxruntime/core/session/onnxruntime_c_api.cc | 2 +- .../lora/adapter_format/adapter_schema.fbs.h | 171 +++++++++--------- .../lora/adapter_format/compile_schema.py | 2 + onnxruntime/lora/adapter_format_utils.cc | 4 +- onnxruntime/lora/adapter_format_utils.h | 2 +- onnxruntime/lora/adapter_format_version.h | 2 +- onnxruntime/lora/lora_adapters.h | 2 +- .../onnxruntime_inference_collection.py | 11 +- .../test/python/onnxruntime_test_python.py | 11 +- 10 files changed, 108 insertions(+), 103 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h index d22051ded78ef..4934ff97a857b 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h @@ -740,7 +740,7 @@ struct CustomOpDomain : detail::Base { /// \brief LoraAdapter holds a set of Lora Parameters loaded from a single file struct LoraAdapter : detail::Base { /// \brief Wraps OrtApi::CreateLoraAdapter - /// + /// /// The function attempts to load the adapter from the specified file /// \param absolute_adapter_path The absolute path to the Lora adapter /// \param allocator optional pointer to a device allocator. If nullptr, the data stays on CPU. It would still @@ -784,7 +784,7 @@ struct RunOptions : detail::Base { * * Wraps OrtApi::RunOptionsSetLoraAdapterActive * \param adapter The LoraAdapter to be used as the active adapter - */ + */ RunOptions& SetLoraAdapterActive(const LoraAdapter& adapter); }; diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index 0a037d1acb256..296e729fc5bfb 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -835,7 +835,7 @@ void CheckAndAdjustForLora(const OrtRunOptions* run_options, for (const lora::LoraAdapter* ad : run_options->active_adapters_) { ad->OutputAdapterParameters(std::back_inserter(input_names_with_lora), - std::back_inserter(input_with_lora)); + std::back_inserter(input_with_lora)); } input_names = gsl::make_span(input_names_with_lora); diff --git a/onnxruntime/lora/adapter_format/adapter_schema.fbs.h b/onnxruntime/lora/adapter_format/adapter_schema.fbs.h index b361a4e35f465..192ef6a5a4c80 100644 --- a/onnxruntime/lora/adapter_format/adapter_schema.fbs.h +++ b/onnxruntime/lora/adapter_format/adapter_schema.fbs.h @@ -1,6 +1,5 @@ // automatically generated by the FlatBuffers compiler, do not modify - #ifndef FLATBUFFERS_GENERATED_ADAPTERSCHEMA_ONNXRUNTIME_ADAPTERS_H_ #define FLATBUFFERS_GENERATED_ADAPTERSCHEMA_ONNXRUNTIME_ADAPTERS_H_ @@ -9,9 +8,9 @@ // Ensure the included flatbuffers.h is the same version as when this file was // generated, otherwise it may not be compatible. static_assert(FLATBUFFERS_VERSION_MAJOR == 23 && - FLATBUFFERS_VERSION_MINOR == 5 && - FLATBUFFERS_VERSION_REVISION == 26, - "Non-compatible flatbuffers version included"); + FLATBUFFERS_VERSION_MINOR == 5 && + FLATBUFFERS_VERSION_REVISION == 26, + "Non-compatible flatbuffers version included"); namespace onnxruntime { namespace adapters { @@ -50,60 +49,58 @@ enum class TensorDataType : int32_t { inline const TensorDataType (&EnumValuesTensorDataType())[21] { static const TensorDataType values[] = { - TensorDataType::UNDEFINED, - TensorDataType::FLOAT, - TensorDataType::UINT8, - TensorDataType::INT8, - TensorDataType::UINT16, - TensorDataType::INT16, - TensorDataType::INT32, - TensorDataType::INT64, - TensorDataType::STRING, - TensorDataType::BOOL, - TensorDataType::FLOAT16, - TensorDataType::DOUBLE, - TensorDataType::UINT32, - TensorDataType::UINT64, - TensorDataType::COMPLEX64, - TensorDataType::COMPLEX128, - TensorDataType::BFLOAT16, - TensorDataType::FLOAT8E4M3FN, - TensorDataType::FLOAT8E4M3FNUZ, - TensorDataType::FLOAT8E5M2, - TensorDataType::FLOAT8E5M2FNUZ - }; + TensorDataType::UNDEFINED, + TensorDataType::FLOAT, + TensorDataType::UINT8, + TensorDataType::INT8, + TensorDataType::UINT16, + TensorDataType::INT16, + TensorDataType::INT32, + TensorDataType::INT64, + TensorDataType::STRING, + TensorDataType::BOOL, + TensorDataType::FLOAT16, + TensorDataType::DOUBLE, + TensorDataType::UINT32, + TensorDataType::UINT64, + TensorDataType::COMPLEX64, + TensorDataType::COMPLEX128, + TensorDataType::BFLOAT16, + TensorDataType::FLOAT8E4M3FN, + TensorDataType::FLOAT8E4M3FNUZ, + TensorDataType::FLOAT8E5M2, + TensorDataType::FLOAT8E5M2FNUZ}; return values; } -inline const char * const *EnumNamesTensorDataType() { - static const char * const names[22] = { - "UNDEFINED", - "FLOAT", - "UINT8", - "INT8", - "UINT16", - "INT16", - "INT32", - "INT64", - "STRING", - "BOOL", - "FLOAT16", - "DOUBLE", - "UINT32", - "UINT64", - "COMPLEX64", - "COMPLEX128", - "BFLOAT16", - "FLOAT8E4M3FN", - "FLOAT8E4M3FNUZ", - "FLOAT8E5M2", - "FLOAT8E5M2FNUZ", - nullptr - }; +inline const char* const* EnumNamesTensorDataType() { + static const char* const names[22] = { + "UNDEFINED", + "FLOAT", + "UINT8", + "INT8", + "UINT16", + "INT16", + "INT32", + "INT64", + "STRING", + "BOOL", + "FLOAT16", + "DOUBLE", + "UINT32", + "UINT64", + "COMPLEX64", + "COMPLEX128", + "BFLOAT16", + "FLOAT8E4M3FN", + "FLOAT8E4M3FNUZ", + "FLOAT8E5M2", + "FLOAT8E5M2FNUZ", + nullptr}; return names; } -inline const char *EnumNameTensorDataType(TensorDataType e) { +inline const char* EnumNameTensorDataType(TensorDataType e) { if (::flatbuffers::IsOutRange(e, TensorDataType::UNDEFINED, TensorDataType::FLOAT8E5M2FNUZ)) return ""; const size_t index = static_cast(e); return EnumNamesTensorDataType()[index]; @@ -117,19 +114,19 @@ struct Parameter FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { VT_DATA_TYPE = 8, VT_RAW_DATA = 10 }; - const ::flatbuffers::String *name() const { - return GetPointer(VT_NAME); + const ::flatbuffers::String* name() const { + return GetPointer(VT_NAME); } - const ::flatbuffers::Vector *dims() const { - return GetPointer *>(VT_DIMS); + const ::flatbuffers::Vector* dims() const { + return GetPointer*>(VT_DIMS); } onnxruntime::adapters::TensorDataType data_type() const { return static_cast(GetField(VT_DATA_TYPE, 0)); } - const ::flatbuffers::Vector *raw_data() const { - return GetPointer *>(VT_RAW_DATA); + const ::flatbuffers::Vector* raw_data() const { + return GetPointer*>(VT_RAW_DATA); } - bool Verify(::flatbuffers::Verifier &verifier) const { + bool Verify(::flatbuffers::Verifier& verifier) const { return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NAME) && verifier.VerifyString(name()) && @@ -144,7 +141,7 @@ struct Parameter FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { struct ParameterBuilder { typedef Parameter Table; - ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::FlatBufferBuilder& fbb_; ::flatbuffers::uoffset_t start_; void add_name(::flatbuffers::Offset<::flatbuffers::String> name) { fbb_.AddOffset(Parameter::VT_NAME, name); @@ -158,8 +155,8 @@ struct ParameterBuilder { void add_raw_data(::flatbuffers::Offset<::flatbuffers::Vector> raw_data) { fbb_.AddOffset(Parameter::VT_RAW_DATA, raw_data); } - explicit ParameterBuilder(::flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + explicit ParameterBuilder(::flatbuffers::FlatBufferBuilder& _fbb) + : fbb_(_fbb) { start_ = fbb_.StartTable(); } ::flatbuffers::Offset Finish() { @@ -170,7 +167,7 @@ struct ParameterBuilder { }; inline ::flatbuffers::Offset CreateParameter( - ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::FlatBufferBuilder& _fbb, ::flatbuffers::Offset<::flatbuffers::String> name = 0, ::flatbuffers::Offset<::flatbuffers::Vector> dims = 0, onnxruntime::adapters::TensorDataType data_type = onnxruntime::adapters::TensorDataType::UNDEFINED, @@ -184,14 +181,16 @@ inline ::flatbuffers::Offset CreateParameter( } inline ::flatbuffers::Offset CreateParameterDirect( - ::flatbuffers::FlatBufferBuilder &_fbb, - const char *name = nullptr, - const std::vector *dims = nullptr, + ::flatbuffers::FlatBufferBuilder& _fbb, + const char* name = nullptr, + const std::vector* dims = nullptr, onnxruntime::adapters::TensorDataType data_type = onnxruntime::adapters::TensorDataType::UNDEFINED, - const std::vector *raw_data = nullptr) { + const std::vector* raw_data = nullptr) { auto name__ = name ? _fbb.CreateString(name) : 0; auto dims__ = dims ? _fbb.CreateVector(*dims) : 0; - if (raw_data) { _fbb.ForceVectorAlignment(raw_data->size(), sizeof(uint8_t), 8); } + if (raw_data) { + _fbb.ForceVectorAlignment(raw_data->size(), sizeof(uint8_t), 8); + } auto raw_data__ = raw_data ? _fbb.CreateVector(*raw_data) : 0; return onnxruntime::adapters::CreateParameter( _fbb, @@ -218,10 +217,10 @@ struct Adapter FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { int32_t model_version() const { return GetField(VT_MODEL_VERSION, 0); } - const ::flatbuffers::Vector<::flatbuffers::Offset> *parameters() const { - return GetPointer> *>(VT_PARAMETERS); + const ::flatbuffers::Vector<::flatbuffers::Offset>* parameters() const { + return GetPointer>*>(VT_PARAMETERS); } - bool Verify(::flatbuffers::Verifier &verifier) const { + bool Verify(::flatbuffers::Verifier& verifier) const { return VerifyTableStart(verifier) && VerifyField(verifier, VT_FORMAT_VERSION, 4) && VerifyField(verifier, VT_ADAPTER_VERSION, 4) && @@ -235,7 +234,7 @@ struct Adapter FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { struct AdapterBuilder { typedef Adapter Table; - ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::FlatBufferBuilder& fbb_; ::flatbuffers::uoffset_t start_; void add_format_version(int32_t format_version) { fbb_.AddElement(Adapter::VT_FORMAT_VERSION, format_version, 0); @@ -249,8 +248,8 @@ struct AdapterBuilder { void add_parameters(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> parameters) { fbb_.AddOffset(Adapter::VT_PARAMETERS, parameters); } - explicit AdapterBuilder(::flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + explicit AdapterBuilder(::flatbuffers::FlatBufferBuilder& _fbb) + : fbb_(_fbb) { start_ = fbb_.StartTable(); } ::flatbuffers::Offset Finish() { @@ -261,7 +260,7 @@ struct AdapterBuilder { }; inline ::flatbuffers::Offset CreateAdapter( - ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::FlatBufferBuilder& _fbb, int32_t format_version = 0, int32_t adapter_version = 0, int32_t model_version = 0, @@ -275,11 +274,11 @@ inline ::flatbuffers::Offset CreateAdapter( } inline ::flatbuffers::Offset CreateAdapterDirect( - ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::FlatBufferBuilder& _fbb, int32_t format_version = 0, int32_t adapter_version = 0, int32_t model_version = 0, - const std::vector<::flatbuffers::Offset> *parameters = nullptr) { + const std::vector<::flatbuffers::Offset>* parameters = nullptr) { auto parameters__ = parameters ? _fbb.CreateVector<::flatbuffers::Offset>(*parameters) : 0; return onnxruntime::adapters::CreateAdapter( _fbb, @@ -289,46 +288,46 @@ inline ::flatbuffers::Offset CreateAdapterDirect( parameters__); } -inline const onnxruntime::adapters::Adapter *GetAdapter(const void *buf) { +inline const onnxruntime::adapters::Adapter* GetAdapter(const void* buf) { return ::flatbuffers::GetRoot(buf); } -inline const onnxruntime::adapters::Adapter *GetSizePrefixedAdapter(const void *buf) { +inline const onnxruntime::adapters::Adapter* GetSizePrefixedAdapter(const void* buf) { return ::flatbuffers::GetSizePrefixedRoot(buf); } -inline const char *AdapterIdentifier() { +inline const char* AdapterIdentifier() { return "GAIL"; } -inline bool AdapterBufferHasIdentifier(const void *buf) { +inline bool AdapterBufferHasIdentifier(const void* buf) { return ::flatbuffers::BufferHasIdentifier( buf, AdapterIdentifier()); } -inline bool SizePrefixedAdapterBufferHasIdentifier(const void *buf) { +inline bool SizePrefixedAdapterBufferHasIdentifier(const void* buf) { return ::flatbuffers::BufferHasIdentifier( buf, AdapterIdentifier(), true); } inline bool VerifyAdapterBuffer( - ::flatbuffers::Verifier &verifier) { + ::flatbuffers::Verifier& verifier) { return verifier.VerifyBuffer(AdapterIdentifier()); } inline bool VerifySizePrefixedAdapterBuffer( - ::flatbuffers::Verifier &verifier) { + ::flatbuffers::Verifier& verifier) { return verifier.VerifySizePrefixedBuffer(AdapterIdentifier()); } inline void FinishAdapterBuffer( - ::flatbuffers::FlatBufferBuilder &fbb, + ::flatbuffers::FlatBufferBuilder& fbb, ::flatbuffers::Offset root) { fbb.Finish(root, AdapterIdentifier()); } inline void FinishSizePrefixedAdapterBuffer( - ::flatbuffers::FlatBufferBuilder &fbb, + ::flatbuffers::FlatBufferBuilder& fbb, ::flatbuffers::Offset root) { fbb.FinishSizePrefixed(root, AdapterIdentifier()); } diff --git a/onnxruntime/lora/adapter_format/compile_schema.py b/onnxruntime/lora/adapter_format/compile_schema.py index 48090c2c2f7d0..4536c48391dda 100644 --- a/onnxruntime/lora/adapter_format/compile_schema.py +++ b/onnxruntime/lora/adapter_format/compile_schema.py @@ -8,6 +8,7 @@ SCRIPT_DIR = pathlib.Path(__file__).parent.resolve() + def generate_cpp(flatc: pathlib.Path, schema_path: pathlib.Path): # run flatc to generate C++ code cmd = [str(flatc), "--cpp", "--scoped-enums", "--filename-suffix", ".fbs", str(schema_path)] @@ -49,5 +50,6 @@ def main(): if "cpp" in languages: generate_cpp(flatc, schema_path) + if __name__ == "__main__": main() diff --git a/onnxruntime/lora/adapter_format_utils.cc b/onnxruntime/lora/adapter_format_utils.cc index 1d6f3e3da98cb..19781e3edf8e7 100644 --- a/onnxruntime/lora/adapter_format_utils.cc +++ b/onnxruntime/lora/adapter_format_utils.cc @@ -149,10 +149,10 @@ gsl::span AdapterFormatBuilder::FinishWithSpan(int adapter_version, int void AdapterFormatBuilder::FinishImpl(int adapter_version, int model_version) { auto fbs_params = builder_.CreateVector(params_); auto fbs_adapter = CreateAdapter(builder_, kAdapterFormatVersion, adapter_version, - model_version, fbs_params); + model_version, fbs_params); builder_.Finish(fbs_adapter, AdapterIdentifier()); } } // namespace utils -} // namespace lora +} // namespace adapters } // namespace onnxruntime diff --git a/onnxruntime/lora/adapter_format_utils.h b/onnxruntime/lora/adapter_format_utils.h index 95263a0a3e128..922b88f8443c1 100644 --- a/onnxruntime/lora/adapter_format_utils.h +++ b/onnxruntime/lora/adapter_format_utils.h @@ -134,5 +134,5 @@ std::pair CreateOrtValueOverLoraParameter(const Parameter OrtValue CreateOrtValueOnDevice(const OrtValue& ort_value_mapped, const AllocatorPtr& device_allocator); } // namespace utils -} // namespace lora +} // namespace adapters } // namespace onnxruntime diff --git a/onnxruntime/lora/adapter_format_version.h b/onnxruntime/lora/adapter_format_version.h index a636911e36087..d653492c6db46 100644 --- a/onnxruntime/lora/adapter_format_version.h +++ b/onnxruntime/lora/adapter_format_version.h @@ -29,5 +29,5 @@ inline bool IsAdapterFormatVersionSupported(const int lora_format_version) { return it != kSupportedAdapterFormatVersions.cend(); } -} // namespace lora +} // namespace adapters } // namespace onnxruntime diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h index 8969dff6fcaf9..a9728a89ecbcd 100644 --- a/onnxruntime/lora/lora_adapters.h +++ b/onnxruntime/lora/lora_adapters.h @@ -123,7 +123,7 @@ class LoraAdapter { /// output iterator that accepts const OrtValue* template void OutputAdapterParameters(NamesOutputIter names_out, - TensorOutputIter tensor_out) const { + TensorOutputIter tensor_out) const { for (const auto& [name, param] : params_values_) { *names_out = name.c_str(); ++names_out; diff --git a/onnxruntime/python/onnxruntime_inference_collection.py b/onnxruntime/python/onnxruntime_inference_collection.py index 98ddf0027b86d..8dfa0e8d12562 100644 --- a/onnxruntime/python/onnxruntime_inference_collection.py +++ b/onnxruntime/python/onnxruntime_inference_collection.py @@ -31,21 +31,24 @@ def get_ort_device_type(device_type: str, device_index) -> C.OrtDevice: else: raise Exception("Unsupported device type: " + device_type) + class Adapter: """ Instances of this class are used to represent adapter information obtained from read_adapter(). """ + def __init__(self, adapter): self._adapter = adapter @staticmethod def read_adapter(file_path: os.PathLike) -> Adapter: return Adapter(C.read_adapter(file_path)) - + @staticmethod - def export_adapter(file_path: os.PathLike, adapter_version: int, model_version: int, - params: dict[str, Sequence[Any]]): + def export_adapter( + file_path: os.PathLike, adapter_version: int, model_version: int, params: dict[str, Sequence[Any]] + ): """ This function takes in the parameters and writes a file at the specified location in onnxrunitme adapter format containing Lora parameters. @@ -64,7 +67,7 @@ def get_adapter_version(self): def get_model_version(self): return self._adapter.get_model_version() - + def get_parameters(self) -> dict[str, Sequence[Any]]: return self._adapter.get_parameters() diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py index 903034d2bc062..5fdd0a5e1c562 100644 --- a/onnxruntime/test/python/onnxruntime_test_python.py +++ b/onnxruntime/test/python/onnxruntime_test_python.py @@ -17,7 +17,7 @@ from helper import get_name import onnxruntime as onnxrt -from onnxruntime.capi.onnxruntime_pybind11_state import Fail, OrtValueVector, RunOptions +from onnxruntime.capi.onnxruntime_pybind11_state import Adapter, Fail, OrtValueVector, RunOptions # handle change from python 3.8 and on where loading a dll from the current directory needs to be explicitly allowed. if platform.system() == "Windows" and sys.version_info.major >= 3 and sys.version_info.minor >= 8: # noqa: YTT204 @@ -1829,11 +1829,12 @@ def test_adater_export_read(self): model_version = 1 exported_adapter_file = "test_adapter.onnx_adapter" - values = np.array( # noqa: N806 - [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0], dtype=np.float) - - + param_1 = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0], dtype=np.float) # noqa: N806 + + pram_2 = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0], dtype=np.double) # noqa: N806 + os.remove(exported_adapter_file) + if __name__ == "__main__": unittest.main(verbosity=1) From 5c2e3b47cdcc6542c953d0e60664869369272cb4 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Tue, 10 Sep 2024 16:20:41 -0700 Subject: [PATCH 39/84] Add rudimentary export/read test --- onnxruntime/__init__.py | 1 + .../onnxruntime_inference_collection.py | 2 +- .../test/python/onnxruntime_test_python.py | 19 ++++++++++++++++--- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/onnxruntime/__init__.py b/onnxruntime/__init__.py index e4d85c9d7b975..db74bf371d3f7 100644 --- a/onnxruntime/__init__.py +++ b/onnxruntime/__init__.py @@ -56,6 +56,7 @@ if import_capi_exception: raise import_capi_exception +from onnxruntime.capi.onnxruntime_inference_collection import Adapter # noqa: F401 from onnxruntime.capi.onnxruntime_inference_collection import InferenceSession # noqa: F401 from onnxruntime.capi.onnxruntime_inference_collection import IOBinding # noqa: F401 from onnxruntime.capi.onnxruntime_inference_collection import OrtDevice # noqa: F401 diff --git a/onnxruntime/python/onnxruntime_inference_collection.py b/onnxruntime/python/onnxruntime_inference_collection.py index 8dfa0e8d12562..fc8a5182a3d78 100644 --- a/onnxruntime/python/onnxruntime_inference_collection.py +++ b/onnxruntime/python/onnxruntime_inference_collection.py @@ -63,7 +63,7 @@ def get_format_version(self): return self._adapter.get_format_version() def get_adapter_version(self): - return self._adapter.get_format_version() + return self._adapter.get_adapter_version() def get_model_version(self): return self._adapter.get_model_version() diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py index 5fdd0a5e1c562..6b0362cf0ebae 100644 --- a/onnxruntime/test/python/onnxruntime_test_python.py +++ b/onnxruntime/test/python/onnxruntime_test_python.py @@ -17,7 +17,7 @@ from helper import get_name import onnxruntime as onnxrt -from onnxruntime.capi.onnxruntime_pybind11_state import Adapter, Fail, OrtValueVector, RunOptions +from onnxruntime.capi.onnxruntime_pybind11_state import Fail, OrtValueVector, RunOptions # handle change from python 3.8 and on where loading a dll from the current directory needs to be explicitly allowed. if platform.system() == "Windows" and sys.version_info.major >= 3 and sys.version_info.minor >= 8: # noqa: YTT204 @@ -1829,11 +1829,24 @@ def test_adater_export_read(self): model_version = 1 exported_adapter_file = "test_adapter.onnx_adapter" - param_1 = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0], dtype=np.float) # noqa: N806 + param_1 = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0], dtype=float) + param_2 = np.array([11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0], dtype=np.float64) - pram_2 = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0], dtype=np.double) # noqa: N806 + params = {"param_1": param_1, "param_2": param_2} + onnxrt.Adapter.export_adapter(exported_adapter_file, adapter_version, model_version, params) + + adapter = onnxrt.Adapter.read_adapter(exported_adapter_file) os.remove(exported_adapter_file) + self.assertEqual(adapter_version, adapter.get_adapter_version()) + self.assertEqual(model_version, adapter.get_model_version()) + + actual_params = adapter.get_parameters() + self.assertCountEqual(params, actual_params) + for key, value in actual_params.items(): + self.assertTrue(key in params) + expected_val = params.get(key) + np.testing.assert_allclose(expected_val, value) if __name__ == "__main__": From 135e52cd0949fd47515068f6886da21a598fbf45 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Tue, 10 Sep 2024 16:43:22 -0700 Subject: [PATCH 40/84] Update format signature --- onnxruntime/lora/adapter_format/adapter_schema.fbs | 2 +- onnxruntime/lora/adapter_format/adapter_schema.fbs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/lora/adapter_format/adapter_schema.fbs b/onnxruntime/lora/adapter_format/adapter_schema.fbs index cb0e4415d1555..da1f8dcf5da92 100644 --- a/onnxruntime/lora/adapter_format/adapter_schema.fbs +++ b/onnxruntime/lora/adapter_format/adapter_schema.fbs @@ -48,4 +48,4 @@ table Adapter { } root_type Adapter; -file_identifier "GAIL"; +file_identifier "TORT"; diff --git a/onnxruntime/lora/adapter_format/adapter_schema.fbs.h b/onnxruntime/lora/adapter_format/adapter_schema.fbs.h index 192ef6a5a4c80..c1d5412acbbde 100644 --- a/onnxruntime/lora/adapter_format/adapter_schema.fbs.h +++ b/onnxruntime/lora/adapter_format/adapter_schema.fbs.h @@ -297,7 +297,7 @@ inline const onnxruntime::adapters::Adapter* GetSizePrefixedAdapter(const void* } inline const char* AdapterIdentifier() { - return "GAIL"; + return "TORT"; } inline bool AdapterBufferHasIdentifier(const void* buf) { From 3f85bdbb468191d8654c65f4433e4fcd1fe3514f Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 11 Sep 2024 15:10:23 -0700 Subject: [PATCH 41/84] Add and test ort_value_from_bytes --- onnxruntime/__init__.py | 1 - onnxruntime/lora/lora_adapters.h | 16 +- .../onnxruntime_inference_collection.py | 90 ++++--- onnxruntime/python/onnxruntime_pybind_lora.cc | 220 +++++++++--------- .../python/onnxruntime_pybind_ortvalue.cc | 29 +++ .../python/onnxruntime_pybind_state.cc | 2 +- onnxruntime/python/onnxruntime_pybind_state.h | 2 +- .../python/onnxruntime_pybind_state_common.h | 2 +- .../test/python/onnxruntime_test_python.py | 72 ++++-- 9 files changed, 251 insertions(+), 183 deletions(-) diff --git a/onnxruntime/__init__.py b/onnxruntime/__init__.py index db74bf371d3f7..e4d85c9d7b975 100644 --- a/onnxruntime/__init__.py +++ b/onnxruntime/__init__.py @@ -56,7 +56,6 @@ if import_capi_exception: raise import_capi_exception -from onnxruntime.capi.onnxruntime_inference_collection import Adapter # noqa: F401 from onnxruntime.capi.onnxruntime_inference_collection import InferenceSession # noqa: F401 from onnxruntime.capi.onnxruntime_inference_collection import IOBinding # noqa: F401 from onnxruntime.capi.onnxruntime_inference_collection import OrtDevice # noqa: F401 diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h index a9728a89ecbcd..6fe8a7d362e6e 100644 --- a/onnxruntime/lora/lora_adapters.h +++ b/onnxruntime/lora/lora_adapters.h @@ -43,7 +43,12 @@ class LoraAdapter { explicit Param(OrtValue ort_value_mapped) noexcept; Param(OrtValue ort_value_mapped, OrtValue ort_value_device) noexcept; - const OrtValue& GetMapped() const { + const OrtValue& GetMapped() const noexcept { + return ort_value_mapped_; + } + + // For python interface + OrtValue& GetMapped() noexcept { return ort_value_mapped_; } @@ -52,16 +57,21 @@ class LoraAdapter { OrtValue ort_value_device_; }; - using param_iterator = InlinedHashMap::const_iterator; + using param_const_iterator = InlinedHashMap::const_iterator; + using param_iterator = InlinedHashMap::iterator; /// /// Obtain a range of the iterators /// /// - std::pair GetParamIterators() const { + std::pair GetParamIterators() const { return std::make_pair(params_values_.cbegin(), params_values_.cend()); } + std::pair GetParamIterators() { + return std::make_pair(params_values_.begin(), params_values_.end()); + } + /// /// Load parameters into memory from an adapter file and validates its format. /// diff --git a/onnxruntime/python/onnxruntime_inference_collection.py b/onnxruntime/python/onnxruntime_inference_collection.py index fc8a5182a3d78..d03d8f33839a5 100644 --- a/onnxruntime/python/onnxruntime_inference_collection.py +++ b/onnxruntime/python/onnxruntime_inference_collection.py @@ -32,44 +32,44 @@ def get_ort_device_type(device_type: str, device_index) -> C.OrtDevice: raise Exception("Unsupported device type: " + device_type) -class Adapter: - """ - Instances of this class are used to represent adapter information - obtained from read_adapter(). - """ - - def __init__(self, adapter): - self._adapter = adapter - - @staticmethod - def read_adapter(file_path: os.PathLike) -> Adapter: - return Adapter(C.read_adapter(file_path)) - - @staticmethod - def export_adapter( - file_path: os.PathLike, adapter_version: int, model_version: int, params: dict[str, Sequence[Any]] - ): - """ - This function takes in the parameters and writes a file at the specified location - in onnxrunitme adapter format containing Lora parameters. - :param file_path: absolute path for the adapter - :param adapter_version: the version of the adapter - :param model_version: the version of the model this adapter is being created - :param params: a dictionary of string -> numpy array containing adapter parameters - """ - C.export_adapter(file_path, adapter_version, model_version, params) - - def get_format_version(self): - return self._adapter.get_format_version() - - def get_adapter_version(self): - return self._adapter.get_adapter_version() - - def get_model_version(self): - return self._adapter.get_model_version() - - def get_parameters(self) -> dict[str, Sequence[Any]]: - return self._adapter.get_parameters() +# class Adapter: +# """ +# Instances of this class are used to represent adapter information +# obtained from read_adapter(). +# """ + +# def __init__(self, adapter): +# self._adapter = adapter + +# @staticmethod +# def read_adapter(file_path: os.PathLike) -> Adapter: +# return Adapter(C.read_adapter(file_path)) + +# @staticmethod +# def export_adapter( +# file_path: os.PathLike, adapter_version: int, model_version: int, params: dict[str, Sequence[Any]] +# ): +# """ +# This function takes in the parameters and writes a file at the specified location +# in onnxrunitme adapter format containing Lora parameters. +# :param file_path: absolute path for the adapter +# :param adapter_version: the version of the adapter +# :param model_version: the version of the model this adapter is being created +# :param params: a dictionary of string -> numpy array containing adapter parameters +# """ +# C.export_adapter(file_path, adapter_version, model_version, params) + +# def get_format_version(self): +# return self._adapter.get_format_version() + +# def get_adapter_version(self): +# return self._adapter.get_adapter_version() + +# def get_model_version(self): +# return self._adapter.get_model_version() + +# def get_parameters(self) -> dict[str, Sequence[Any]]: +# return self._adapter.get_parameters() def check_and_normalize_provider_args( @@ -751,6 +751,20 @@ def ortvalue_from_numpy(numpy_obj, device_type="cpu", device_id=0): numpy_obj if device_type.lower() == "cpu" else None, ) + @staticmethod + def ortvalue_from_bytes(data: bytes, shape: Sequence[int], onnx_element_type: int): + """ + This method creates an instance of OrtValue on top of the bytes object + No data copy is made and the lifespan of the resulting OrtValue should never + exceed the lifespan of bytes object + + :param data: bytes containing data. This is expected to be a flat array of bytes. + :param shape: shape of the tensor. shape*data_type_size must match the length of bytes + shape is expected to be a numpy array of int64. + :param onnx_elemenet_type: a valid onnx TensorProto::DataType enum value + """ + return C.OrtValue.ortvalue_from_bytes(data, shape, onnx_element_type) + @staticmethod def ortvalue_from_shape_and_type(shape=None, element_type=None, device_type="cpu", device_id=0): """ diff --git a/onnxruntime/python/onnxruntime_pybind_lora.cc b/onnxruntime/python/onnxruntime_pybind_lora.cc index d38f8e64768f5..64244e443ea67 100644 --- a/onnxruntime/python/onnxruntime_pybind_lora.cc +++ b/onnxruntime/python/onnxruntime_pybind_lora.cc @@ -14,19 +14,18 @@ #include "core/framework/ort_value.h" #include "core/framework/tensor.h" +#include "lora/adapter_format_version.h" #include "lora/adapter_format_utils.h" #include "lora/lora_adapters.h" #include #include -#include namespace onnxruntime { namespace python { namespace py = pybind11; - namespace { // Check if the numpy dtype descr property has any of the known types @@ -161,127 +160,118 @@ py::dtype ConstructDType(int32_t onnx_type) { ORT_THROW("Unsupported type detected:", onnx_type); } -} // namespace +/// +/// Class that supports writing and reading adapters +/// in innxruntime format +/// +struct PyAdapterFormatReaderWriter { + PyAdapterFormatReaderWriter() = default; + PyAdapterFormatReaderWriter(int format_version, int adapter_version, + int model_version, + lora::LoraAdapter&& loaded_adapter, + py::dict&& params) + : format_version_(format_version), + adapter_version_(adapter_version), + model_version_(model_version), + loaded_adater_(std::move(loaded_adapter)), + parameters_(std::move(params)) {} + + int format_version_{adapters::kAdapterFormatVersion}; + int adapter_version_{0}; + int model_version_{0}; + // This container is used when reading the the file so + // OrtValue objects can be backed by it. Not exposed to Python + std::optional loaded_adater_; + // This is a dictionary of string -> OrtValue + // this is populated directly on write and + // built on top of the loaded_adapter on read + py::dict parameters_; +}; -void addAdapterMethods(pybind11::module& m) { - m.def( - "export_adapter", [](const std::string& file_name, int adapter_version, int model_version, const pybind11::dict& adapter_parameters) { - std::ofstream file(file_name, std::ios::binary); - if (file.fail()) { - ORT_THROW("Failed to open file:", file_name, " for writing."); - } +} // namespace - adapters::utils::AdapterFormatBuilder format_builder; - for (const auto& [n, arr] : adapter_parameters) { - const std::string param_name = py::str(n); - py::array np_array = arr.cast(); - - py::dtype arr_dtype = np_array.dtype(); - - // This is the element type as supported by numpy, - // however, we can have bfloat16 and float8 custom types defined. - auto ml_element_type = NumpyTypeToOnnxRuntimeTensorType(arr_dtype.num()); - auto onnx_element_type = static_cast( - ml_element_type->AsPrimitiveDataType()->GetDataType()); - - if (!ONNX_NAMESPACE::TensorProto_DataType_IsValid(onnx_element_type)) { - ORT_THROW("Unsupported tensor ONNX element type: ", onnx_element_type); - } - - switch (onnx_element_type) { - case ONNX_NAMESPACE::TensorProto_DataType_UINT16: - case ONNX_NAMESPACE::TensorProto_DataType_INT8: - case ONNX_NAMESPACE::TensorProto_DataType_UINT8: { - onnx_element_type = - static_cast(AdjustOnnxTypeIfNeeded(arr_dtype, - onnx_element_type)); - break; +/* */ +void addAdapterFormatMethods(pybind11::module& m) { + py::class_ adapter_binding(m, "Adapter"); + adapter_binding.def(py::init()) + .def_property_readonly( + "format_version", + [](const PyAdapterFormatReaderWriter* reader_writer) -> int { return reader_writer->format_version_; }, + R"pbdoc("Enables user to read format version stored in the file")pbdoc") + .def_property( + "adapter_version", + [](const PyAdapterFormatReaderWriter* reader_writer) -> int { return reader_writer->adapter_version_; }, + [](PyAdapterFormatReaderWriter* reader_writer, int adapter_version) -> void { reader_writer->adapter_version_ = adapter_version; }, + R"pbdoc("Enables user to read format version stored in the file")pbdoc") + .def_property( + "adapter_version", + [](const PyAdapterFormatReaderWriter* reader_writer) -> int { return reader_writer->adapter_version_; }, + [](PyAdapterFormatReaderWriter* reader_writer, int adapter_version) -> void { reader_writer->adapter_version_ = adapter_version; }, + R"pbdoc("Enables user to read/write adapter version stored in the file")pbdoc") + .def_property( + "model_version", + [](const PyAdapterFormatReaderWriter* reader_writer) -> int { return reader_writer->model_version_; }, + [](PyAdapterFormatReaderWriter* reader_writer, int model_version) -> void { reader_writer->model_version_ = model_version; }, + R"pbdoc("Enables user to read/write model version this adapter was created for")pbdoc") + .def_property( + "parameters", + [](const PyAdapterFormatReaderWriter* reader_writer) -> py::dict { return reader_writer->parameters_; }, + [](PyAdapterFormatReaderWriter* reader_writer, py::dict& parameters) -> void { + reader_writer->parameters_ = parameters; + }, + R"pbdoc("Enables user to read/write adapter version stored in the file")pbdoc") + .def( + "export_adapter", + [](const PyAdapterFormatReaderWriter* reader_writer, const std::string& file_name) { + std::ofstream file(file_name, std::ios::binary); + if (file.fail()) { + ORT_THROW("Failed to open file:", file_name, " for writing."); } - default: - break; - }; - - gsl::span shape_span{reinterpret_cast(np_array.shape()), - narrow(np_array.ndim())}; - gsl::span data_span{reinterpret_cast(np_array.data()), - narrow(np_array.nbytes())}; - - format_builder.AddParameter(param_name, static_cast(onnx_element_type), - shape_span, data_span); - } - auto format_span = format_builder.FinishWithSpan(adapter_version, model_version); - if (file.write(reinterpret_cast(format_span.data()), format_span.size()).fail()) { - ORT_THROW("Failed to write :", std::to_string(format_span.size()), " bytes to ", file_name); - } - - if (file.flush().fail()) { - ORT_THROW("Failed to flush :", file_name, " on close"); - } - }, - "Save adapter parameters into a lora file format."); - - class PyAdapter { - public: - PyAdapter(int format_version, int adapter_version, - int model_version, py::dict params) : format_version_(format_version), adapter_version_(adapter_version), model_version_(model_version), parameters_(std::move(params)) {} - int FormatVersion() const noexcept { - return format_version_; - } - - int AdapterVersion() const noexcept { - return adapter_version_; - } - - int ModelVersion() const noexcept { - return model_version_; - } + adapters::utils::AdapterFormatBuilder format_builder; + for (auto& [n, value] : reader_writer->parameters_) { + const std::string param_name = py::str(n); + const OrtValue* ort_value = value.cast(); + const Tensor& tensor = ort_value->Get(); + const auto data_span = + gsl::make_span(reinterpret_cast(tensor.DataRaw()), + tensor.SizeInBytes()); + format_builder.AddParameter( + param_name, static_cast(tensor.GetElementType()), + tensor.Shape().GetDims(), data_span); + } - py::dict GetParameters() const noexcept { - return parameters_; - } + auto format_span = format_builder.FinishWithSpan(reader_writer->adapter_version_, + reader_writer->model_version_); + if (file.write(reinterpret_cast(format_span.data()), format_span.size()).fail()) { + ORT_THROW("Failed to write :", std::to_string(format_span.size()), " bytes to ", file_name); + } - private: - int format_version_; - int adapter_version_; - int model_version_; - py::dict parameters_; - }; + if (file.flush().fail()) { + ORT_THROW("Failed to flush :", file_name, " on close"); + } + }, + "Save adapter parameters into a onnxruntime adapter file format.") + .def_static( + "read_adapter", [](const std::string& file_name) -> std::unique_ptr { + lora::LoraAdapter lora_adapter; + lora_adapter.Load(file_name); + + auto [begin, end] = lora_adapter.GetParamIterators(); + py::dict params; + for (; begin != end; ++begin) { + auto& [name, param] = *begin; + OrtValue& ort_value = param.GetMapped(); + params[py::str(name)] = py::cast(&ort_value); + } - m.def( - "read_adapter", [](const std::string& file_name) -> std::unique_ptr { - lora::LoraAdapter adapter; - adapter.MemoryMap(file_name); - - auto [begin, end] = adapter.GetParamIterators(); - py::dict params; - for (; begin != end; ++begin) { - const auto& [name, param] = *begin; - const auto& tensor = param.GetMapped().Get(); - - const auto onnx_type = tensor.GetElementType(); - const auto size_bytes = tensor.SizeInBytes(); - - py::dtype dtype = ConstructDType(onnx_type); - // No pointer, memory is allocated by array - py::array npy_array(dtype, tensor.Shape().GetDims()); - ORT_ENFORCE(npy_array.size(), tensor.Shape().Size()); - memcpy_s(npy_array.mutable_data(), size_bytes, tensor.DataRaw(), size_bytes); - params[py::str(name)] = std::move(npy_array); - } + auto py_adapter = std::make_unique( + lora_adapter.FormatVersion(), lora_adapter.AdapterVersion(), + lora_adapter.ModelVersion(), std::move(lora_adapter), std::move(params)); - auto py_adapter = std::make_unique(adapter.FormatVersion(), adapter.AdapterVersion(), - adapter.ModelVersion(), std::move(params)); - return py_adapter; - }, - "The function returns an instance of the class that contains a dictionary of name -> numpy arrays"); - - py::class_ adapter_binding(m, "Adapter"); - adapter_binding.def(py::init()) - .def("get_format_version", &PyAdapter::GetParameters) - .def("get_adapter_version", &PyAdapter::AdapterVersion) - .def("get_model_version", &PyAdapter::ModelVersion) - .def("get_parameters", &PyAdapter::GetParameters); + return py_adapter; + }, + "The function returns an instance of the class that contains a dictionary of name -> numpy arrays"); } } // namespace python diff --git a/onnxruntime/python/onnxruntime_pybind_ortvalue.cc b/onnxruntime/python/onnxruntime_pybind_ortvalue.cc index d76b9032afe73..795c0cb08c530 100644 --- a/onnxruntime/python/onnxruntime_pybind_ortvalue.cc +++ b/onnxruntime/python/onnxruntime_pybind_ortvalue.cc @@ -142,6 +142,35 @@ void addOrtValueMethods(pybind11::module& m) { throw std::runtime_error("Unsupported device: Cannot update the OrtValue on this device"); } }) + // Create an ortvalue bytes. The ort_value is created on top of the bytes + // and its life span must not exceed that of the bytes object + .def_static("ortvalue_from_bytes", [](py::bytes& bytes, py::array_t& shape, int32_t onnx_element_type) -> std::unique_ptr { + if (!ONNX_NAMESPACE::TensorProto_DataType_IsValid(onnx_element_type)) { + ORT_THROW("Not a valid ONNX Tensor data type: ", onnx_element_type); + } + + const auto shape_span = gsl::make_span(shape.data(), shape.size()); + const auto num_elements = std::accumulate(shape_span.begin(), shape_span.end(), 1LL, + std::multiplies()); + + const auto element_type = DataTypeImpl::TensorTypeFromONNXEnum(onnx_element_type) + ->GetElementType(); + + const auto element_size = element_type->Size(); + + const auto expected_bytes_size = element_size * num_elements; + const std::string_view view = bytes; + if (narrow(view.size()) != expected_bytes_size) { + ORT_THROW("Shape specifies: ", num_elements, " with total bytes size: ", expected_bytes_size, + " received: ", view.size()); + } + + auto cpu_allocator = GetAllocator(); + auto ort_value = std::make_unique(); + Tensor::InitOrtValue(element_type, TensorShape{shape_span}, + const_cast(view.data()), cpu_allocator->Info(), *ort_value); + return ort_value; + }) // Factory method to create an OrtValue (Tensor) from the given shape and element type with memory on the specified device // The memory is left uninitialized .def_static("ortvalue_from_shape_and_type", [](const std::vector& shape, py::object& element_type, const OrtDevice& device) { diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 1d0dc0e1be02e..7bd700bd91038 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -2243,7 +2243,7 @@ bool CreateInferencePybindStateModule(py::module& m) { addOrtValueMethods(m); addSparseTensorMethods(m); addIoBindingMethods(m); - addAdapterMethods(m); + addAdapterFormatMethods(m); #if !defined(__APPLE__) && !defined(ORT_MINIMAL_BUILD) if (!InitProvidersSharedLibrary()) { diff --git a/onnxruntime/python/onnxruntime_pybind_state.h b/onnxruntime/python/onnxruntime_pybind_state.h index 9c6f97b407fc8..5ec53c42f2706 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.h +++ b/onnxruntime/python/onnxruntime_pybind_state.h @@ -9,7 +9,7 @@ namespace python { void addGlobalMethods(py::module& m, Environment& env); void addObjectMethods(py::module& m, Environment& env); void addOrtValueMethods(pybind11::module& m); -void addAdapterMethods(pybind11::module& m); +void addAdapterFormatMethods(pybind11::module& m); } // namespace python } // namespace onnxruntime diff --git a/onnxruntime/python/onnxruntime_pybind_state_common.h b/onnxruntime/python/onnxruntime_pybind_state_common.h index 5cff369e9ee43..1641e7d28990b 100644 --- a/onnxruntime/python/onnxruntime_pybind_state_common.h +++ b/onnxruntime/python/onnxruntime_pybind_state_common.h @@ -394,7 +394,7 @@ void addIoBindingMethods(pybind11::module& m); void addSparseTensorMethods(pybind11::module& m); -void addAdapterMethods(pybind11::module& m); +void addAdapterFormatMethods(pybind11::module& m); void addGlobalSchemaFunctions(pybind11::module& m); diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py index 6b0362cf0ebae..c9d0e2fd33e97 100644 --- a/onnxruntime/test/python/onnxruntime_test_python.py +++ b/onnxruntime/test/python/onnxruntime_test_python.py @@ -1389,6 +1389,17 @@ def test_session_with_ortvalue_input(ortvalue): # The constructed OrtValue should still be valid after being used in a session self.assertTrue(np.array_equal(ortvalue1.numpy(), numpy_arr_input)) + # test ort_value creation on top of the bytes + input_shape = numpy_arr_input.shape + numpy_arr_input_bytes = numpy_arr_input.tobytes() + float_tensor_data_type = 1 # TensorProto_DataType_FLOAT + ort_value_over_bytes = onnxrt.OrtValue.ortvalue_from_bytes( + numpy_arr_input_bytes, input_shape, float_tensor_data_type + ) + self.assertTrue(ort_value_over_bytes.is_tensor()) + self.assertEqual(float_tensor_data_type, ort_value_over_bytes.element_type()) + self.assertEqual([3, 2], ort_value_over_bytes.shape()) + if "CUDAExecutionProvider" in onnxrt.get_available_providers(): ortvalue2 = onnxrt.OrtValue.ortvalue_from_numpy(numpy_arr_input, "cuda", 0) self.assertEqual(ortvalue2.device_name(), "cuda") @@ -1824,29 +1835,44 @@ def test_multiple_devices(self): device1_session.run(output_names=["Plus214_Output_0"], input_feed=image) device0_session.run(output_names=["Plus214_Output_0"], input_feed=image) - def test_adater_export_read(self): - adapter_version = 1 - model_version = 1 - exported_adapter_file = "test_adapter.onnx_adapter" - - param_1 = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0], dtype=float) - param_2 = np.array([11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0], dtype=np.float64) - - params = {"param_1": param_1, "param_2": param_2} - - onnxrt.Adapter.export_adapter(exported_adapter_file, adapter_version, model_version, params) - - adapter = onnxrt.Adapter.read_adapter(exported_adapter_file) - os.remove(exported_adapter_file) - self.assertEqual(adapter_version, adapter.get_adapter_version()) - self.assertEqual(model_version, adapter.get_model_version()) - - actual_params = adapter.get_parameters() - self.assertCountEqual(params, actual_params) - for key, value in actual_params.items(): - self.assertTrue(key in params) - expected_val = params.get(key) - np.testing.assert_allclose(expected_val, value) + # def test_adater_export_read(self): + # adapter_version = 1 + # model_version = 1 + # exported_adapter_file = "test_adapter.onnx_adapter" + + # val1 = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + # # val2 = [11, 12, 13, 14, 15, 16, 17, 18, 19, 20] + # # param_1 = np.array(val1, dtype=float) + # # param_2 = np.array(val2, dtype=np.float64) + + # types = [ + # # np.dtype((np.uint16, {"bfloat16" :(np.uint16, 0)})), + # # np.dtype((np.uint8, {"e4m3fn" :(np.uint8, 0)})), + # # np.dtype((np.uint8, {"e4m3fnuz" :(np.uint8, 0)})), + # # np.dtype((np.uint8, {"e5m2" :(np.uint8, 0)})), + # # np.dtype((np.uint8, {"e5m2fnuz" :(np.uint8, 0)})), + # float, + # np.float64] + + # params = {} + # for t in types: + # name = "param_" + str(len(params)) + # v = np.array(val1, dtype=t) + # params[name] = v + + # onnxrt.Adapter.export_adapter(exported_adapter_file, adapter_version, model_version, params) + + # adapter = onnxrt.Adapter.read_adapter(exported_adapter_file) + # os.remove(exported_adapter_file) + # self.assertEqual(adapter_version, adapter.get_adapter_version()) + # self.assertEqual(model_version, adapter.get_model_version()) + + # actual_params = adapter.get_parameters() + # self.assertCountEqual(params, actual_params) + # for key, value in actual_params.items(): + # self.assertTrue(key in params) + # expected_val = params.get(key) + # np.testing.assert_allclose(expected_val, value) if __name__ == "__main__": From afbe6faeceea3f82d1a24075de7d1a998c4132f7 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 11 Sep 2024 17:36:58 -0700 Subject: [PATCH 42/84] AdapterFormat tests now pass --- onnxruntime/__init__.py | 1 + .../onnxruntime_inference_collection.py | 96 ++++++++++--------- onnxruntime/python/onnxruntime_pybind_lora.cc | 2 +- .../python/onnxruntime_pybind_ortvalue.cc | 22 ++--- .../test/python/onnxruntime_test_python.py | 87 ++++++++--------- 5 files changed, 103 insertions(+), 105 deletions(-) diff --git a/onnxruntime/__init__.py b/onnxruntime/__init__.py index e4d85c9d7b975..01d2f4e9a85f7 100644 --- a/onnxruntime/__init__.py +++ b/onnxruntime/__init__.py @@ -56,6 +56,7 @@ if import_capi_exception: raise import_capi_exception +from onnxruntime.capi.onnxruntime_inference_collection import AdapterFormat # noqa: F401 from onnxruntime.capi.onnxruntime_inference_collection import InferenceSession # noqa: F401 from onnxruntime.capi.onnxruntime_inference_collection import IOBinding # noqa: F401 from onnxruntime.capi.onnxruntime_inference_collection import OrtDevice # noqa: F401 diff --git a/onnxruntime/python/onnxruntime_inference_collection.py b/onnxruntime/python/onnxruntime_inference_collection.py index d03d8f33839a5..531ad5992c9cb 100644 --- a/onnxruntime/python/onnxruntime_inference_collection.py +++ b/onnxruntime/python/onnxruntime_inference_collection.py @@ -32,44 +32,50 @@ def get_ort_device_type(device_type: str, device_index) -> C.OrtDevice: raise Exception("Unsupported device type: " + device_type) -# class Adapter: -# """ -# Instances of this class are used to represent adapter information -# obtained from read_adapter(). -# """ - -# def __init__(self, adapter): -# self._adapter = adapter - -# @staticmethod -# def read_adapter(file_path: os.PathLike) -> Adapter: -# return Adapter(C.read_adapter(file_path)) - -# @staticmethod -# def export_adapter( -# file_path: os.PathLike, adapter_version: int, model_version: int, params: dict[str, Sequence[Any]] -# ): -# """ -# This function takes in the parameters and writes a file at the specified location -# in onnxrunitme adapter format containing Lora parameters. -# :param file_path: absolute path for the adapter -# :param adapter_version: the version of the adapter -# :param model_version: the version of the model this adapter is being created -# :param params: a dictionary of string -> numpy array containing adapter parameters -# """ -# C.export_adapter(file_path, adapter_version, model_version, params) - -# def get_format_version(self): -# return self._adapter.get_format_version() - -# def get_adapter_version(self): -# return self._adapter.get_adapter_version() - -# def get_model_version(self): -# return self._adapter.get_model_version() - -# def get_parameters(self) -> dict[str, Sequence[Any]]: -# return self._adapter.get_parameters() +class AdapterFormat: + """ + This class is used to create adapter files + """ + + def __init__(self, adapter=None): + if adapter is None: + self._adapter = C.AdapterFormat() + else: + self._adapter = adapter + + @staticmethod + def read_adapter(file_path: os.PathLike) -> AdapterFormat: + return AdapterFormat(C.AdapterFormat.read_adapter(file_path)) + + def export_adapter(self, file_path: os.PathLike): + """ + This function writes a file at the specified location + in onnxrunitme adapter format containing Lora parameters. + + :param file_path: absolute path for the adapter + """ + self._adapter.export_adapter(file_path) + + def get_format_version(self): + return self._adapter.format_version + + def set_adapter_version(self, adapter_version: int): + self._adapter.adapter_version = adapter_version + + def get_adapter_version(self): + return self._adapter.adapter_version + + def set_model_version(self, model_version: int): + self._adapter.model_version = model_version + + def get_model_version(self): + return self._adapter.model_version + + def set_parameters(self, params: dict[str, OrtValue]): + self._adapter.parameters = {k: v._ortvalue for k, v in params.items()} + + def get_parameters(self) -> dict[str, OrtValue]: + return {k: OrtValue(v) for k, v in self._adapter.parameters.items()} def check_and_normalize_provider_args( @@ -752,18 +758,18 @@ def ortvalue_from_numpy(numpy_obj, device_type="cpu", device_id=0): ) @staticmethod - def ortvalue_from_bytes(data: bytes, shape: Sequence[int], onnx_element_type: int): + def ortvalue_from_numpy_with_onnxtype(data: Sequence[int], onnx_element_type: int): """ - This method creates an instance of OrtValue on top of the bytes object + This method creates an instance of OrtValue on top of the numpy array No data copy is made and the lifespan of the resulting OrtValue should never - exceed the lifespan of bytes object + exceed the lifespan of bytes object. The API attempts to reinterpret + the data type which is expected to be the same size. This is useful + when we want to use an ONNX data type that is not supported by numpy. - :param data: bytes containing data. This is expected to be a flat array of bytes. - :param shape: shape of the tensor. shape*data_type_size must match the length of bytes - shape is expected to be a numpy array of int64. + :param data: numpy array. :param onnx_elemenet_type: a valid onnx TensorProto::DataType enum value """ - return C.OrtValue.ortvalue_from_bytes(data, shape, onnx_element_type) + return OrtValue(C.OrtValue.ortvalue_from_numpy_with_onnxtype(data, onnx_element_type), data) @staticmethod def ortvalue_from_shape_and_type(shape=None, element_type=None, device_type="cpu", device_id=0): diff --git a/onnxruntime/python/onnxruntime_pybind_lora.cc b/onnxruntime/python/onnxruntime_pybind_lora.cc index 64244e443ea67..8c762ea2afa82 100644 --- a/onnxruntime/python/onnxruntime_pybind_lora.cc +++ b/onnxruntime/python/onnxruntime_pybind_lora.cc @@ -192,7 +192,7 @@ struct PyAdapterFormatReaderWriter { /* */ void addAdapterFormatMethods(pybind11::module& m) { - py::class_ adapter_binding(m, "Adapter"); + py::class_ adapter_binding(m, "AdapterFormat"); adapter_binding.def(py::init()) .def_property_readonly( "format_version", diff --git a/onnxruntime/python/onnxruntime_pybind_ortvalue.cc b/onnxruntime/python/onnxruntime_pybind_ortvalue.cc index 795c0cb08c530..ae41ae68bde0c 100644 --- a/onnxruntime/python/onnxruntime_pybind_ortvalue.cc +++ b/onnxruntime/python/onnxruntime_pybind_ortvalue.cc @@ -142,33 +142,27 @@ void addOrtValueMethods(pybind11::module& m) { throw std::runtime_error("Unsupported device: Cannot update the OrtValue on this device"); } }) - // Create an ortvalue bytes. The ort_value is created on top of the bytes - // and its life span must not exceed that of the bytes object - .def_static("ortvalue_from_bytes", [](py::bytes& bytes, py::array_t& shape, int32_t onnx_element_type) -> std::unique_ptr { + // Create an ortvalue value on top of the numpy array, but interpret the data + // as a different type with the same element size. + .def_static("ortvalue_from_numpy_with_onnxtype", [](py::array& data, int32_t onnx_element_type) -> std::unique_ptr { if (!ONNX_NAMESPACE::TensorProto_DataType_IsValid(onnx_element_type)) { ORT_THROW("Not a valid ONNX Tensor data type: ", onnx_element_type); } - const auto shape_span = gsl::make_span(shape.data(), shape.size()); - const auto num_elements = std::accumulate(shape_span.begin(), shape_span.end(), 1LL, - std::multiplies()); - const auto element_type = DataTypeImpl::TensorTypeFromONNXEnum(onnx_element_type) ->GetElementType(); const auto element_size = element_type->Size(); - - const auto expected_bytes_size = element_size * num_elements; - const std::string_view view = bytes; - if (narrow(view.size()) != expected_bytes_size) { - ORT_THROW("Shape specifies: ", num_elements, " with total bytes size: ", expected_bytes_size, - " received: ", view.size()); + if (narrow(data.itemsize()) != element_size) { + ORT_THROW("Items size in the incoming aray: ", data.itemsize(), + " specified by onnxtype: ", element_size); } auto cpu_allocator = GetAllocator(); auto ort_value = std::make_unique(); + const auto shape_span = gsl::make_span(data.shape(), data.ndim()); Tensor::InitOrtValue(element_type, TensorShape{shape_span}, - const_cast(view.data()), cpu_allocator->Info(), *ort_value); + const_cast(data.data()), cpu_allocator->Info(), *ort_value); return ort_value; }) // Factory method to create an OrtValue (Tensor) from the given shape and element type with memory on the specified device diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py index c9d0e2fd33e97..b852d599251ec 100644 --- a/onnxruntime/test/python/onnxruntime_test_python.py +++ b/onnxruntime/test/python/onnxruntime_test_python.py @@ -1391,14 +1391,11 @@ def test_session_with_ortvalue_input(ortvalue): # test ort_value creation on top of the bytes input_shape = numpy_arr_input.shape - numpy_arr_input_bytes = numpy_arr_input.tobytes() float_tensor_data_type = 1 # TensorProto_DataType_FLOAT - ort_value_over_bytes = onnxrt.OrtValue.ortvalue_from_bytes( - numpy_arr_input_bytes, input_shape, float_tensor_data_type - ) - self.assertTrue(ort_value_over_bytes.is_tensor()) - self.assertEqual(float_tensor_data_type, ort_value_over_bytes.element_type()) - self.assertEqual([3, 2], ort_value_over_bytes.shape()) + ort_value_with_type = onnxrt.OrtValue.ortvalue_from_numpy_with_onnxtype(numpy_arr_input, float_tensor_data_type) + self.assertTrue(ort_value_with_type.is_tensor()) + self.assertEqual(float_tensor_data_type, ort_value_with_type.element_type()) + self.assertEqual([3, 2], ort_value_with_type.shape()) if "CUDAExecutionProvider" in onnxrt.get_available_providers(): ortvalue2 = onnxrt.OrtValue.ortvalue_from_numpy(numpy_arr_input, "cuda", 0) @@ -1835,44 +1832,44 @@ def test_multiple_devices(self): device1_session.run(output_names=["Plus214_Output_0"], input_feed=image) device0_session.run(output_names=["Plus214_Output_0"], input_feed=image) - # def test_adater_export_read(self): - # adapter_version = 1 - # model_version = 1 - # exported_adapter_file = "test_adapter.onnx_adapter" - - # val1 = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - # # val2 = [11, 12, 13, 14, 15, 16, 17, 18, 19, 20] - # # param_1 = np.array(val1, dtype=float) - # # param_2 = np.array(val2, dtype=np.float64) - - # types = [ - # # np.dtype((np.uint16, {"bfloat16" :(np.uint16, 0)})), - # # np.dtype((np.uint8, {"e4m3fn" :(np.uint8, 0)})), - # # np.dtype((np.uint8, {"e4m3fnuz" :(np.uint8, 0)})), - # # np.dtype((np.uint8, {"e5m2" :(np.uint8, 0)})), - # # np.dtype((np.uint8, {"e5m2fnuz" :(np.uint8, 0)})), - # float, - # np.float64] - - # params = {} - # for t in types: - # name = "param_" + str(len(params)) - # v = np.array(val1, dtype=t) - # params[name] = v - - # onnxrt.Adapter.export_adapter(exported_adapter_file, adapter_version, model_version, params) - - # adapter = onnxrt.Adapter.read_adapter(exported_adapter_file) - # os.remove(exported_adapter_file) - # self.assertEqual(adapter_version, adapter.get_adapter_version()) - # self.assertEqual(model_version, adapter.get_model_version()) - - # actual_params = adapter.get_parameters() - # self.assertCountEqual(params, actual_params) - # for key, value in actual_params.items(): - # self.assertTrue(key in params) - # expected_val = params.get(key) - # np.testing.assert_allclose(expected_val, value) + def test_adater_export_read(self): + adapter_version = 1 + model_version = 1 + exported_adapter_file = "test_adapter.onnx_adapter" + + float_data_type = 1 + int64_data_type = 7 + val = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + param_1 = np.array(val).astype(np.float32).reshape(5, 2) + param_2 = np.array(val).astype(np.int64).reshape(2, 5) + + ort_val_1 = onnxrt.OrtValue.ortvalue_from_numpy_with_onnxtype(param_1, float_data_type) + ort_val_2 = onnxrt.OrtValue.ortvalue_from_numpy_with_onnxtype(param_2, int64_data_type) + + params = {"param_1": ort_val_1, "param_2": ort_val_2} + + adapter_format = onnxrt.AdapterFormat() + adapter_format.set_adapter_version(adapter_version) + adapter_format.set_model_version(model_version) + adapter_format.set_parameters(params) + + adapter_format.export_adapter(exported_adapter_file) + + adapter_format_read = onnxrt.AdapterFormat.read_adapter(exported_adapter_file) + os.remove(exported_adapter_file) + + self.assertEqual(adapter_version, adapter_format_read.get_adapter_version()) + self.assertEqual(model_version, adapter_format_read.get_model_version()) + + actual_params = adapter_format_read.get_parameters() + self.assertCountEqual(params, actual_params) + for key, value in actual_params.items(): + self.assertTrue(key in params) + expected_val = params.get(key) + self.assertTrue(value.is_tensor()) + self.assertEqual(expected_val.element_type(), value.element_type()) + self.assertEqual(expected_val.shape(), value.shape()) + np.testing.assert_allclose(expected_val.numpy(), value.numpy()) if __name__ == "__main__": From 1c3841fde137c26c218ef8a17dc4d989cf10f771 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 11 Sep 2024 19:34:44 -0700 Subject: [PATCH 43/84] Implement py::LoraAdapter, RunOptions and adjust run() --- onnxruntime/__init__.py | 1 + onnxruntime/lora/lora_adapters.h | 7 + .../onnxruntime_inference_collection.py | 4 +- onnxruntime/python/onnxruntime_pybind_lora.cc | 142 +----------------- .../python/onnxruntime_pybind_state.cc | 31 +++- 5 files changed, 45 insertions(+), 140 deletions(-) diff --git a/onnxruntime/__init__.py b/onnxruntime/__init__.py index 01d2f4e9a85f7..0e9a924bde4bb 100644 --- a/onnxruntime/__init__.py +++ b/onnxruntime/__init__.py @@ -23,6 +23,7 @@ from onnxruntime.capi._pybind_state import ExecutionMode # noqa: F401 from onnxruntime.capi._pybind_state import ExecutionOrder # noqa: F401 from onnxruntime.capi._pybind_state import GraphOptimizationLevel # noqa: F401 + from onnxruntime.capi._pybind_state import LoraAdapter # noqa: F401 from onnxruntime.capi._pybind_state import ModelMetadata # noqa: F401 from onnxruntime.capi._pybind_state import NodeArg # noqa: F401 from onnxruntime.capi._pybind_state import OrtAllocatorType # noqa: F401 diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h index 6fe8a7d362e6e..d8b5e56d5f861 100644 --- a/onnxruntime/lora/lora_adapters.h +++ b/onnxruntime/lora/lora_adapters.h @@ -52,6 +52,13 @@ class LoraAdapter { return ort_value_mapped_; } + const OrtValue& GetDeviceOrMapped() const noexcept { + if (ort_value_device_.IsAllocated()) { + return ort_value_device_; + } + return ort_value_mapped_; + } + private: OrtValue ort_value_mapped_; OrtValue ort_value_device_; diff --git a/onnxruntime/python/onnxruntime_inference_collection.py b/onnxruntime/python/onnxruntime_inference_collection.py index 531ad5992c9cb..d0304160dc68d 100644 --- a/onnxruntime/python/onnxruntime_inference_collection.py +++ b/onnxruntime/python/onnxruntime_inference_collection.py @@ -34,10 +34,10 @@ def get_ort_device_type(device_type: str, device_index) -> C.OrtDevice: class AdapterFormat: """ - This class is used to create adapter files + This class is used to create adapter files from python structures """ - def __init__(self, adapter=None): + def __init__(self, adapter=None) -> None: if adapter is None: self._adapter = C.AdapterFormat() else: diff --git a/onnxruntime/python/onnxruntime_pybind_lora.cc b/onnxruntime/python/onnxruntime_pybind_lora.cc index 8c762ea2afa82..a98b8cb38a36a 100644 --- a/onnxruntime/python/onnxruntime_pybind_lora.cc +++ b/onnxruntime/python/onnxruntime_pybind_lora.cc @@ -27,139 +27,6 @@ namespace python { namespace py = pybind11; namespace { - -// Check if the numpy dtype descr property has any of the known types -// that is not supported natively by numpy arrays. -// For example: -// >>> bfloat16 = np.dtype((np.uint16, {"bfloat16": (np.uint16, 0)})) -// >>> print(bfloat16.descr) -// [('bfloat16', ' GetDescrPropertyString(const py::dtype& arr_dtype) { - std::optional custom_type; - try { - if (py::hasattr(arr_dtype, "descr")) { - auto descr = py::getattr(arr_dtype, "descr").cast(); - if (descr.size() > 0) { - auto item = descr[0].cast(); - if (item.size() > 0) { - custom_type = item[0].cast(); - } - } - } - } catch (const py::cast_error&) { - // Ignore the exception - PyErr_Clear(); - } - return custom_type; -} - -// bfloat16 = np.dtype((np.uint16, {"bfloat16": (np.uint16, 0)})) -py::dtype ConstructCustomDtype(int32_t npy_type, const std::string& custom_type_tag) { - py::dtype first_arg(npy_type); - - py::dict second_arg; - second_arg[py::str(custom_type_tag)] = py::make_tuple(first_arg, 0); - auto tuple = py::make_tuple(std::move(first_arg), std::move(second_arg)); - - py::dtype result{py::dtype::from_args(tuple)}; - return result; -} - -// Get mapped OnnxDataType from numpy dtype descriptior -// float4e2m1 unsupported at the moment -std::optional GetOnnxDataTypeFromCustomPythonDescr(const std::string& descr) { - static const std::unordered_map dtype_descr = { - {"bfloat16", ONNX_NAMESPACE::TensorProto_DataType_BFLOAT16}, - {"e4m3fn", ONNX_NAMESPACE::TensorProto_DataType_FLOAT8E4M3FN}, - {"e4m3fnuz", ONNX_NAMESPACE::TensorProto_DataType_FLOAT8E4M3FNUZ}, - {"e5m2", ONNX_NAMESPACE::TensorProto_DataType_FLOAT8E5M2}, - {"e5m2fnuz", ONNX_NAMESPACE::TensorProto_DataType_FLOAT8E5M2FNUZ}, - {"int4", ONNX_NAMESPACE::TensorProto_DataType_INT4}, - {"uint4", ONNX_NAMESPACE::TensorProto_DataType_UINT4}, - }; - - auto hit = dtype_descr.find(descr); - if (hit == dtype_descr.end()) { - return std::nullopt; - } - - return hit->second; -} - -// If a custom type is discovered in numpy array we set the correct ONNX type. -int32_t AdjustOnnxTypeIfNeeded(const py::dtype& arr_dtype, int32_t base_type_from_array) { - auto descr = GetDescrPropertyString(arr_dtype); - if (descr.has_value()) { - auto adjusted_type = GetOnnxDataTypeFromCustomPythonDescr(*descr); - if (adjusted_type.has_value()) { - return *adjusted_type; - } - } - return base_type_from_array; -} - -std::optional FromOnnxTypeToNumpySupportedType(int32_t onnx_type) { - // Numpy supported types mapping - static std::unordered_map onnxtype_to_numpy{ - {ONNX_NAMESPACE::TensorProto_DataType_BOOL, NPY_BOOL}, - {ONNX_NAMESPACE::TensorProto_DataType_FLOAT, NPY_FLOAT}, - {ONNX_NAMESPACE::TensorProto_DataType_FLOAT16, NPY_FLOAT16}, - {ONNX_NAMESPACE::TensorProto_DataType_DOUBLE, NPY_DOUBLE}, - {ONNX_NAMESPACE::TensorProto_DataType_INT8, NPY_INT8}, - {ONNX_NAMESPACE::TensorProto_DataType_UINT8, NPY_UINT8}, - {ONNX_NAMESPACE::TensorProto_DataType_INT16, NPY_INT16}, - {ONNX_NAMESPACE::TensorProto_DataType_UINT16, NPY_UINT16}, - {ONNX_NAMESPACE::TensorProto_DataType_INT32, NPY_INT}, - {ONNX_NAMESPACE::TensorProto_DataType_UINT32, NPY_UINT}, - {ONNX_NAMESPACE::TensorProto_DataType_INT64, NPY_LONGLONG}, - {ONNX_NAMESPACE::TensorProto_DataType_UINT64, NPY_ULONGLONG}, - {ONNX_NAMESPACE::TensorProto_DataType_STRING, NPY_STRING}, - }; - - auto hit = onnxtype_to_numpy.find(onnx_type); - if (hit == onnxtype_to_numpy.end()) - return std::nullopt; - - return hit->second; -} - -std::optional> GetCustomNumpyTypeFromOnnxType(int32_t onnx_data_type) { - static const std::unordered_map> onnxtype_to_custom_numpy_type = { - {ONNX_NAMESPACE::TensorProto_DataType_BFLOAT16, {NPY_UINT16, "bfloat16"}}, - {ONNX_NAMESPACE::TensorProto_DataType_FLOAT8E4M3FN, {NPY_UINT8, "e4m3fn"}}, - {ONNX_NAMESPACE::TensorProto_DataType_FLOAT8E4M3FNUZ, {NPY_UINT8, "e4m3fnuz"}}, - {ONNX_NAMESPACE::TensorProto_DataType_FLOAT8E5M2, {NPY_UINT8, "e5m2"}}, - {ONNX_NAMESPACE::TensorProto_DataType_FLOAT8E5M2FNUZ, {NPY_UINT8, "e5m2fnuz"}}, - {ONNX_NAMESPACE::TensorProto_DataType_INT4, {NPY_INT8, "int4"}}, - {ONNX_NAMESPACE::TensorProto_DataType_UINT4, {NPY_UINT8, "uint4"}}}; - - auto hit = onnxtype_to_custom_numpy_type.find(onnx_data_type); - if (hit == onnxtype_to_custom_numpy_type.end()) { - return std::nullopt; - } - - return hit->second; -} - -py::dtype ConstructDType(int32_t onnx_type) { - // check if the type maps to onnx custom type - auto custom_type = GetCustomNumpyTypeFromOnnxType(onnx_type); - if (custom_type.has_value()) { - return ConstructCustomDtype(custom_type->first, custom_type->second); - } - - auto npy_type = FromOnnxTypeToNumpySupportedType(onnx_type); - if (npy_type.has_value()) { - return py::dtype(*npy_type); - } - ORT_THROW("Unsupported type detected:", onnx_type); -} - /// /// Class that supports writing and reading adapters /// in innxruntime format @@ -251,7 +118,8 @@ void addAdapterFormatMethods(pybind11::module& m) { ORT_THROW("Failed to flush :", file_name, " on close"); } }, - "Save adapter parameters into a onnxruntime adapter file format.") + R"pbdoc("Save adapter parameters into a onnxruntime adapter file format.)pbdoc") + .def_static( "read_adapter", [](const std::string& file_name) -> std::unique_ptr { lora::LoraAdapter lora_adapter; @@ -271,7 +139,11 @@ void addAdapterFormatMethods(pybind11::module& m) { return py_adapter; }, - "The function returns an instance of the class that contains a dictionary of name -> numpy arrays"); + R"pbdoc(The function returns an instance of the class that contains a dictionary of name -> numpy arrays)pbdoc"); + + py::class_ lora_adapter_binding(m, "LoraAdapter"); + lora_adapter_binding.def(py::init()) + .def("Load", [](lora::LoraAdapter* adapter, const std::wstring& file_path) { adapter->MemoryMap(file_path); }, R"pbdoc(Memory map the specified file as LoraAdapter)pbdoc"); } } // namespace python diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 7bd700bd91038..175f10baeb88c 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -31,6 +31,8 @@ #include "core/session/onnxruntime_session_options_config_keys.h" #include "core/session/provider_bridge_ort.h" +#include "lora/lora_adapters.h" + #ifdef ENABLE_ATEN #include "contrib_ops/cpu/aten_ops/aten_op_executor.h" #endif @@ -1883,7 +1885,12 @@ RunOptions instance. The individual calls will exit gracefully and return an err return value; }, - R"pbdoc(Get a single run configuration value using the given configuration key.)pbdoc"); + R"pbdoc(Get a single run configuration value using the given configuration key.)pbdoc") + .def( + "set_adapter_active", [](RunOptions* options, lora::LoraAdapter* adapter) { + options->active_adapters_.push_back(adapter); + }, + R"pbdoc(Activates the specified lora adapter)pbdoc"); py::class_(m, "ModelMetadata", R"pbdoc(Pre-defined and custom metadata about the model. It is usually used to identify the model used to run the prediction and @@ -2004,7 +2011,25 @@ including arg name, arg type (contains both type and shape).)pbdoc") const std::map& pyfeeds, RunOptions* run_options = nullptr) -> py::list { NameMLValMap feeds; - feeds.reserve(pyfeeds.size()); + if (run_options != nullptr && !run_options->active_adapters_.empty()) { + size_t total_entries = pyfeeds.size(); + for (const auto* adapter : run_options->active_adapters_) { + total_entries += adapter->GetParamNum(); + } + feeds.reserve(total_entries); + + // Append necessary inputs for active adapters + for (const auto* adapter : run_options->active_adapters_) { + auto [begin, end] = adapter->GetParamIterators(); + for (; begin != end; ++begin) { + const auto& [name, param] = *begin; + feeds.insert(std::make_pair(name, param.GetDeviceOrMapped())); + } + } + } else { + feeds.reserve(pyfeeds.size()); + } + for (const auto& feed : pyfeeds) { // No need to process 'None's sent in by the user // to feed Optional inputs in the graph. @@ -2018,7 +2043,7 @@ including arg name, arg type (contains both type and shape).)pbdoc") } CreateGenericMLValue(px.second, GetAllocator(), feed.first, feed.second, &ml_value); ThrowIfPyErrOccured(); - feeds.insert(std::make_pair(feed.first, ml_value)); + feeds.insert(std::make_pair(feed.first, std::move(ml_value))); } } From 887167240b1abbfc46f4ac86c8a41907be69ca4b Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 12 Sep 2024 11:27:39 -0700 Subject: [PATCH 44/84] Address build issues --- cmake/onnxruntime_lora.cmake | 2 +- onnxruntime/core/session/onnxruntime_c_api.cc | 5 -- .../test/python/onnxruntime_test_python.py | 1 - .../test/testdata/lora/sample_weights.npz | Bin 0 -> 43766 bytes onnxruntime/test/testdata/lora/test_model.py | 50 ++++++++++++++++++ 5 files changed, 51 insertions(+), 7 deletions(-) create mode 100644 onnxruntime/test/testdata/lora/sample_weights.npz create mode 100644 onnxruntime/test/testdata/lora/test_model.py diff --git a/cmake/onnxruntime_lora.cmake b/cmake/onnxruntime_lora.cmake index 3f99e230031ad..7ba48454d997e 100644 --- a/cmake/onnxruntime_lora.cmake +++ b/cmake/onnxruntime_lora.cmake @@ -10,7 +10,7 @@ file(GLOB onnxruntime_lora_srcs CONFIGURE_DEPENDS source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_lora_srcs}) onnxruntime_add_static_library(onnxruntime_lora ${onnxruntime_lora_srcs}) -onnxruntime_add_include_to_target(onnxruntime_lora onnx flatbuffers::flatbuffers ${GSL_TARGET}) +onnxruntime_add_include_to_target(onnxruntime_lora onnx flatbuffers::flatbuffers Boost::mp11 ${GSL_TARGET}) target_link_libraries(onnxruntime_lora onnxruntime_framework) if(onnxruntime_ENABLE_INSTRUMENT) diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index 296e729fc5bfb..f6fca38b8750a 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -896,11 +896,6 @@ ORT_API_STATUS_IMPL(OrtApis::RunAsync, _Inout_ OrtSession* sess, _In_opt_ const auto output_name_span = gsl::make_span(output_names, output_names_len); auto output_span = gsl::make_span(output, output_names_len); - InlinedVector input_names_with_lora; - InlinedVector input_with_lora; - - CheckAndAdjustForLora(run_options, input_names_with_lora, input_with_lora, input_names_span, input_span); - return ToOrtStatus(session->RunAsync(run_options, input_names_span, input_span, diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py index b852d599251ec..1d7399d19e947 100644 --- a/onnxruntime/test/python/onnxruntime_test_python.py +++ b/onnxruntime/test/python/onnxruntime_test_python.py @@ -1390,7 +1390,6 @@ def test_session_with_ortvalue_input(ortvalue): self.assertTrue(np.array_equal(ortvalue1.numpy(), numpy_arr_input)) # test ort_value creation on top of the bytes - input_shape = numpy_arr_input.shape float_tensor_data_type = 1 # TensorProto_DataType_FLOAT ort_value_with_type = onnxrt.OrtValue.ortvalue_from_numpy_with_onnxtype(numpy_arr_input, float_tensor_data_type) self.assertTrue(ort_value_with_type.is_tensor()) diff --git a/onnxruntime/test/testdata/lora/sample_weights.npz b/onnxruntime/test/testdata/lora/sample_weights.npz new file mode 100644 index 0000000000000000000000000000000000000000..06928df692172ae54b8d3f1a90020cbece276241 GIT binary patch literal 43766 zcmdSBc~njR|37+~lcaerr+Lt%GSz-PcjlSQnUg7V=FCSzNQ#hzB1u9*Qv3PbM@R^X z5|Jc?R7gT{&->o>UF&{7zkBcce((3*wZ7liTKkW)*FO6Ud%qs%F}+UUFcnop$^S8I zB$qGdJdmkMB>!yPB!-g3OJ~nnGIig{@Fip=AspI6#G|%&J3HQcD024SuuJG{`VOPk+OS^+H<;;L7ks1k6h~;va98^$RN>#EYn2r(S#|`^ zFTBNePu+_RZ!(zf8FkKYO)WdtQjW5eX)IuFg8!;C6KLu`mh&5i%%ioH!J8xOk<^W} zChO8jJ2wjWvQbcecaa6Y+QM>rUSM8Ub@)JCg_0w9Va5#|kZzISygNn^de;Jj9(BVF zpWP@q=9Pc6-(cJm;z>c-`-I`jR`h${VVwNKgCwI}ge!V_RJ6YvW)FVDcB51^M#TD)9`kZ8qD6H$6OW1Yq+UE_L+8^XSoHFulV3u zT`O3hT#9y|Ole|Z4fa~N9=~=^V$Gc`(J9RVPOMI0k7Hh=Y}?xWq(%b@Oq{@)7v09% zNgG(bogvdY{e~r-Q=*Rd-D%sc+ZaB70K20017{Z7!$TDf*m%y9OrGe2S9E{2ONhrD zbFpg_^MrR5FYnP)=3dnaSlW*KL5D-DzX zc47`&ig5lL7npk_4mSmU!($FLcz(rQrd*xHmPZ_9hl4RKvd8*{a_r!Kyx5D*@U$;~TRRz;*>vLq7ytQ(o~SrbYJ zYkZLZj5$W{VBcQ3K*WU4>{OH~1S-GB2|2-7;;13G1&(ID`ns}HhcdC{k;rDxwPI85 zoyOaDoM71#OXxA+3l37!f`D1;^KTu9!HP4tgatt{tmOPJVY=%BW*ivF=5&uo<9GGA z`BqojdO;uLV?X%^{yT`%-D=|W4>J9;{f8m0@Bg0=*UL@n4jHNUC+B{^4mq6J$eQN7zJSRgPstwBZtDwjYMh7X!@1+ z0M4dFljhcww0_cKvKaA>WM}`doMq2JzS0%E`qZ+RjQtQkH=ebwxR1G$Oks78>(J6B zhqwhLwAynkS(o0U;9FXx`>qYE?YH3Ec`0;ipc0??`8#I7$tY3oQLONLfA)0-=0XxZ%31E zS~K~b{RA3H&O9|20SSj->`XOYzS*BHZEz3|FX+VAYBkZq{3E+q&!_X^5ftb#w-(X=KrMA3d>ZR~8&!@sX;`uffj& z5j63*DnICRSAO-jX1Kc9mLJ}I5R5b|hj@o93iBOC(mjWTU2_UScd>!Eed|+*`(wq+ z`xXmf<9bt2X(Se%*#QIgsffvCN-TYZuGpNeD$W*vQ{LV6bjVG@7u8&$qRy`3f^J>J z%tmu?+LungDkjh`*+5*{ti~^MGvZs1_*3$@J=p6@Ez`6wA;-QZV#;kNUSbw2j0)($ zulMf4n@~4?i@q1XMt+QBM^^YKXzRn-9ZOl{iVJbTeg>-xnj+C+oZ+!A5ucr<_(njtCf;Dq^9T;woJLB z*nj#cm}eoQHG}Q=Lu*T@CMO-5FBj6bHeWs}CzU-fy#T&(D%9jR5lV)ShB}9>Z0Fz3 z{Q8TQd_b56bu_W!xhJ6jJ#(PfE_br2HR2_vJy@;N0m$F!AqE|MhXrR!A-=yE)V8jI z=C$GAb*mMH{+l7T+JPN!xkI1pt$5X)PU6U`KOt>$3;Yf>7d1aslSOxZe&BNpG5YKu z(zIr@M$iytLwa%OVF_8|t*|v}G`#p`Ao}(?M<(s2V&rlIke<9MIGb0oXD7GN z9(BdBfnC9LttW*StOXxkJAT+J8~)a&Vp?$M4~&q#Aj!#4Ayz?I*1dWR0qZJRonbc0 z4e!#whdB51wr~DJ5V!Du5R21X@d?cbrLnIrDcEzf89X0;n1uzjbK})4A@Qg^HND=) zSy<}9%OqEdo8<&6%`8~e-1{s@HUvX;zjDc)Kl=x%MREN?&Z240p%^$!jota>3S-Sp zsoCA44E(J|E!Tl{$ zrhxfz`I>iQgq-DHS-_x){>eYw@JEmX4cr&aq(<80+hG8Xu=JvjpU*P6Zkq7%6QS4d zi`=7QyKsU{B+Kk>g?i<#5PGmHTFw}PC1zcjr{iASHsvyUm#)UplpXA~o;JwrZsm`0 zG^bw1*<87IJ{#P58yC1{39EKIFHEhR#bmGCIK57WaDQJazR-+k9`{eO+gj$Z-0CmO zwX$bXThfF%AGE=x$qCYm_OPFWPvhF+$GFJ-E(SQ9Li5HWtYV}eQ`J+VjJw55B}5k% zg#2c)2|AShcmZzp>k6?OO$5VsNA|w50z-Xm(Ddj)T>g18CXe_lMBR&FA799s=V}!y z|0S@nJ%%Lj+rX_Jb_qug`^%Q3oyLZ=Eo`XSLpCg+8+3nSM9F3jDErbEyWjDo?8(*a z)G`--rNb5eVdT@<@ z;lnzX(|%e|?2$$;I5Ey&rBMe0)#8PCJ4D_0;R5}ZvtQPs=pipftL=Aif&M-=OkxPi z)n#n%x8GQI@Bo_?Xa>0+$FX4C9rXGpaGh=T;18`T>|SF*vgmZKbY&w8oY76#;NOKR zWHHFHwqvxrA}Y=;uGjMPKj(>n#p8&|Ze zai)uXtf8=ynDXmmPOI~8oG?n;uM ze13_=4zB+bH}V+sgn4ZeQKzJdWtxSeta^EV+pr!OFsX~+RxlD-kqP*%$z>}FG{J4F zJAThMr^kUtP~QHR{TP#hzpS4ynfF!ytX2cOdd?9%{=2id zklSZr!=8HTt869a-X2ZPXZpkCCJRxzK}W3FR6_A*%*8UF>m<8&Us&J2oVwmQO`|SK z_yCi&7{BZf<<{O|)|I=#xvHG>{-n{MC2pe9EHi#g*vUR;ohTfgh`ZmW12USf_SNx zCjPYKx1Dnk^_8}QuWmJ+yJ{~^uek&%&H8-B$D?c|_`{8><#c9_9)CSri*JhOfcCtspiR4##ZsM2lAgQ7MXkGx(^}2MyR*w+)461* z7~}(k^hAL+zxkkNX3y#=@c-tJ7+!V0Q4R!P~g~hObK@p zgU-BRP4Uebmgz&~#lInMVKp>eD8b`3rhH=4ZCK~8Cx#FD!no2klyG_n6q#y?iyu9p z^q-w+r*RiPH|I6Zomc~T4{k&KSsU?peMkOqy{fn%b_*PClZwuEOTl&QXWG%?$(xYGrW0XW{l909=YQ&dNG=2|THA$cidmqwBt#dFnS&4S}G=i5?3RVYA2C3aq40=|A zVUL_3fVM&D#2;{NNLTU5c?WSx#TK}FB9r{9oO$mBTWHfHJuxKZCxy?tLYARcVx2`d z2<|qFiVv>RUCInP7ib|=|Hu$ z=rQXld6g~3`bbaFuFirFc-Mf}I{5NBnY$q1!6_^q_z(^@T!S`$7pSp6PhPj%sHJie zxm3NQ!d3NfLMH)=jPBF7EKUARtR+83b0=L|+*R~{tId}c9iq2oR($RZGYC>pkS!y1 zL_=kB(bLSbj+J{oQWn{&J@++!~8U*o`8sf^~ek7Tw&3!pw z%QwxPOH)qVqQ1r2ZM%d)?G~Wv^iWV6 zpNW#fOM>j`tGp+Mi*b*!0c4pqVM16cr>nY-O4@pl8K_;`3lA=-Wu{JAloezn1Ufm>N^^H+_{@UoBwWWTqh^%; z*5o9w;<)tIv*H2c{=n7FwrA%vF0k$AOsFk(1wJ2K!XA$`r{s#Atj_`;n)1^flv|#lkHax^G1e#h z_ZCb&D{&c%RrGS+MI9CqNa3c2<7LfO7Zf7Ls>0>2nN~_%gT5R=l5W4LPgVpjWrK!chG>=HlTFtqCOpzw$b+b5gLb3v-ywWm}5b zf0d=bC>HbwHnL^5PVmd?3v=IMNI&{~LCLe#g6Xzy^calkYkV2&&a}z0`&MROfH@AD(@ZfF&Yp1Yg{jxWGjcHU$h{E&^* zzRi{|*QX_896x zvjI%%Kz9{v`pNXK*mAT3T|aCFQ#&=|n9Hebf{~P*Whp4h*y~^CCuO-V2RX?gWt?tM z!aN76P~@_;EY--0rPVxS4g-9_>~&Y@m1K?5lRVe%_>dio_`#ZX_u;hUznJY}U6O=c z?frNDM>HK&$p!X(z>TTA$^N(|V5zN~CGFq9WDbpf9~P)W>%DkRH*p?&`u-`lv>j%) zt{#+hvIPs46U#eT$;L^2S(7@$(2U#M8+%*W`D`by3wg+*CZ=)k9KNxUd>dPH={SCV z{Rq2c9mM#TL)gp=Wss@t$nSF22G0CZr^71o=(M#H{7tq26C+hJ)=k3~%|r03v4qMu zr7-nz5p3^5RS+Wju;&G*88p_isU|JhQW%cfQ#;Y?JV!|QSuGq)jmKLFH~xjiwG3Ps z{GW!n|L!bq^PUc3g}u4>PFevMd3FBg=ROd&?FbXPRgm-NPt0(ruNbqxvpDaZvgl{m z0O}v_6IW&r@`z8ga?~zJH~)e&Y>A7Y- znEcpHL2t)Ek=J{C5dQ@HV*gO+?2b@v6OU!#_WYpt8vJCT2;Q|8!d%sQ7@4jqo?5n- z##`v|=9R6mAov1!+Z2IqK}RvH@e3ZbDx#B}qv?3n6Z$w}H$;x=Bz|hq=YzInVoKOG znqKB3zLdN0s@rEln%NzA(Mbh<{c#a>55>cz115aUkvQ0XBNwfi~4@K(CdzYsM??ki5;uq=r~)xO)`!~rXl4%F2}UPAHem~XBgNcgxt>7Q~qZ! z{=nu=e8d7PQPRK*@-Js-_^~Kx>7EBQQ@=nKe}aA0kVD$%CJ1g(fyQOd{Mlz_yiQ3F ztvIC27aT1BujR>X&=zYkwt%s|e;XB6@ICG;TS_x$sf#*^u~6`(5+cW&l051wHe2_k zwjP7&0i@9V@%p?_-4CL(qamiY1`--25U|=C(jL`<7@KUwblV_hFU(1N3ZYL}oXM28wyK7{xdcaWUA z#&Scp!g8SiepSDPX);@0qlcatnh*z6`+*+IRr!FRsW7={Bl-3DL7aL9{dly69$CMq z0G2J3B->H>{+qC;YbRcwxtX~XD}1-`ZTYUDWmLUm3erXgv1sa5G~Jd-gg&D8m`9Y&1@%cy*3IBv;pV8+T`V>LSbKbup0_L za@AZ5QR_oK$Q)d`@?#>Um#K?6xk(VXsU2@*nDRX~{)J2Ru{5dChgS_2>D^m}2dGg6 z+q*=A&07avcK2fbmPs~zO@|2h_d>PznYp+BLlC$0e-Mj{e{YDUe@8R(!L{sLsuLYs zb)H3^`G|``HnLH1?zG%L1b<~WVTwlz9$ldUacfWDa<2DN7C1CVwZN^!G0A6u&dgD5<$+^&vS->R!uBssVPaH*iw%T z&vC0~Cz!reWXk+879K6dJt~GE*VfC=anhz^gF^96vH=8t(_kB{hOh+Pd8qsL0PEXu zjXiy44ynQ@cBWaMlC9rzjva5a2hqpau-DqKOvjWmqXw|1YE@yb+8s<>(T@M5qf3(t zaaix$c-64fefJbj4HZ^NuWC396>#YH4iYupK5iurb9didmQ#35@ipW zYW5g-VyGa=zHm*EZ8MvZr?7aECtbAPhU(i6vIU)g;czou^qc&iwcSw1pw2J3vuUc3 zq~S;r$MdoDstE-&RB-Y{5lsWHbDuBQU`UfUEe(#tL5`ha(VF>8_Bzo2?=(9&-OCz0 zt!(JBs7?nXv?%D}4a{ApU}3?1Fpzu1ai2$FjbSAVHS}U>H&3y&S)Vb)D4R)!U-Zkm zeTW;Wtw@-?^k~0vH{U+2~3sL+JxsJiRM; zKY7otY*U8Y?V4~R%Z9>6eZ_Gz9a#FQkAj!$VARp6X1m-D;AmwF@J@B7lrA?h`O_>; zHvgc1meM3{)>ShY{K=ChJiE$%dK$y|N64x_r{x!zT2TEaZ`?lO6>D0jL3S5yq3n+) z%+It1`3?(eFIk1g6(5*gi75=un#AI#c;TQ5EqJu_2&UhOWt%I_;KYGUjKvzxtD80D zPPrxw`KSTb!b4`3bqeiS8NP7fF?Z}I-1Jm~Vh=Q77)#-%O*Dbj!`-p&-gZH#?Tq0U zCt={*qr&i~fw;iNiZY&-VSR};&NcqZb~`-4k?F}S(z*%TuRmatNAWCQ?-6nyrli~E zB0P$FjMd?4^m3;y)z06|?Dad*$G>}7#o6AtPxmHvi+6`5o!&C%oKu3zJzp@*yv>>3 zQc+yHPSn)$Tlnp-0t0vFutyPTsO@b+--fl|5_X!^y2P;Hk&}f@ZYNnxmv+`^zBy!- zUBT#ljujm~g;Qs>pg~Lo>fSBGBa`fDft@e;u4%xP@jfidqDpu$)ry9C%20MFn@gKs zh7#f-W!;*LHFxW=a!fv2uH1)gMvxKZZA z%l^|4_urkxjqTo%PyO+VoEta5_YV@j;mBQhVr)tcSJcF*x2A%0z(Zyetw^)B4QF*{ zI*7yPEQf|KuRvaH49Al4A-Ks+lz((&4d)KRs#GI>Y_l(4!ER7k)CKBWdjc*^OQJ%< zQuw~QBfs@x4Rsi1!^4H`kXQEra+R-Q`;cES|9)q__t{Ro#Ah;>`EVEbb#fDHYjj}m zDNiwZpAz}0$)UW<0nn8W!d-9kL3iIfRuJ`@QvVpk-S%;mAdO`5@@!#rp%ri2RtCA= z?JO#BE0lX1ijwRrg3X}>dh;!rbk%gI>3IuC1p>leNu%dD9oIkGDM-IB|o^va$eJyKm%+3v_!58VOg)1N?iszg*i z^%m4E|A5>sh#Q|}CAwKylAA09W(A=o6Tz{1)<8I*DmE zO>nbeG8MNt(rKkrWO+vebe()yh~a9|H@^&{9$E6sZpDWFR{YV{&(Z=IN1e z?Sio=S+q_FmRO2+72k7RNDFGOe?{ADt;DM2B7*lfK>9g_g~jc{-N82e(B#=P^v4cZ z@Y!0N+G0k7$_#nkt_EVllQh;lO@)uuk+D`qBVqA8T~STr2=#ba1FtTY(7KuRG$~r~ zUhg-9Y)M@H+Dlid?nfoQ`&J68bz5P{=5Mt0yrUR~w`hucwa(S!f@Ebirz5l8UfkkEVIK2luAK8fh{Cmo zr*f?e67W=+EBGiGP3 zD$JYcRv&5+kKoXnA;IIF`XHzl03$7>-V5je?C zFIE-#8)XM;@+TKOXKmpbIN*sLXzjbhVtWi0OhP>%-F6EnIXm5dc!nd3ZD>KEqcyd? z3t`p^+F17@RTy~Y39Ac{qPT23N_XuM-VaiS4a#4UGm&7h`BN^iYyvx?i1gNJNAkV#Vgb4N^Orip*Jrm!LC^-BvYr+R}tL7f&4e1P(Aw)wqm zZE*Iq^$O~hfMZ;qp-;aOh2iKyrc)}pt>X=0)DTP1F`S7Bsx6%O&J@1ac+p>#KR70& zjCu6!NHXOZ|08SFLABQ&OvsBBR=3CD$f!BE{j!`jH5+m3?#fwp6$$_^*jK;1XnU=W zbyl{5R`WpClx9!suJ2$g$GYPp={z<)M2qe|S&XuPy)5|faUrdu0N?#-#L+hDKpiyb z!LkCDwy=_UeT-xc&mJ?$%Mdo^<|WM9U1Fg?6YHwzPVjbX#ltYKLHjwc2qM&&xO-PL~!2W7ZtmIk|9`Wr$ zrdo5D&W=;e&~rYH>g5T&Lao`fEzXdz_l)qi!$CA{W_YcyC&(SL^GEhGqKNRfSoCm* z5H!Y`n!9(TnGZTDGPoctnZBRB>-CaJMxM!&{kZQpAVwE1T33m|IgOEWG1FeUtXF!1mK%F_0K%n9BcOSu?7@O z`oXMvIzvF)Bmdcnz8E|ug6p_Rh6S3ftjnyUX!zBRj!IO(SSCZ=UG8Y=%bQds;D z3jcT*Blj6od6y!X?5QQzO(-J4(Sg?Xci@MH?t-R|tMTrK*KoqQ7_Lr!L5Wt)P;vh; zY39qY^Xjg=dh;KWsBGavHEcz;I+0?OZ&$s2~{oy5MJNJI@a!|eU}>H zZ5IpPC!nj?W6pbUGjs$t&n(iZ3#|%ZutBm;S_n+LcH1a8YTAp4U)C?;*TOl3(Ia0EYW)aeeU`4mowVv#r#op z@Zl@a_1nls_wB@YTBam=$L^>1s#^R8VJ4UcOQ`RdbFk#XVHhgjp+JjXbn~Vsh3Ke> z!$SAb5WlW`HX4YU1NY$AU`z2;Z%_V@f`hOl?))@WsaXH<58Vz^<*VhcqM5FiXg_x= zb(m|*r>}oQvHLt&^T%%Rbh(nac%cb@X88`f**caaa69iqo+YnS*GkSB#^Swe>U`bu zbIh=8EI3+S0OxVq6gqkXN%NO5m&Nw{xYhPBwud^;7apNsPMV_h)ne$qN?rW5Y8DkL z>4_J*ZKa@0Z3?*C#CF{*ptU7h{Ig{SJZn|rzs$7Ycg@a*r#CD_XQ$5e;)w^eq?AII zs~P+L&_&E1b)K4>TN&4E10NPwQqkK%(0Pz1U$)v_3~cEP50Wa#c)&G^@25w#87+G4M$I$G za_&NJ26PhT*LH&W>qPi)L5-ia-cnrB`6-;zaNr})SdiC|A1ol%gsW(}#)=<*A^Dyn zPI|{pjO^c^V)r`WxSTEE>95b%Umgu+7nOML$sd4@KZPz4E~5Org%GyBgk`MQO48-A z@NJGE-^1}F)%J>@yY?1h$jFb1*Lg;E_MQ0ZGZtvR@FT5W<0IN%^`>d5U3j;7R#Z6o zFz96E((5m`Xs5R>KY08W>aBH%)r>MPfIb7g6 zikHXhKocL- zbv-e=DjS5AqhS1i>%`ue@FoX!c%KL_{(`N$IB%>CKSt#oB=cvuUolM-(eQ=rTW8Wc z@f&3Ja}cxb)WyC-gQ%_DhXM;6nWSk-o=1fM=frNjm%SMV$y}h*^$udwCNmxO2?EzUd`bSK@1J_i{_>T`SLD+_N zRuprMO240hvIr^P>5L=)=Bp>)p)isp)31_s(j&!ORO6HFYB}BIPsuiG2k_VTk)!54 z7&-b73=$1_Nn-r=4=+i2q6epwVCvCa}4t({}T&}I3DsYxA zqW9C2U@cukv85(V`e`+cG`j=^s;XlBnvqmgS&QGay!dt(W%2t%b$-lTiC8$CsK@!E zu=atbnBeS2tCt-E_qJ~QlGBXQ%04Df*DlbSFNt!A2}lxYJX$jiCMX3DSv*2HOr-pt+Mj*!)ozCqC5W+oHxm z-J~t}SEj@d*SkZLyEnjg4Mh{0J)pZYbj7#6DO4Tafi{GVB8y9%_)w=uWEs~&WlEZS zN2N>(d0Gjx^Sr2T+bvf8VFgyLR}*b4Q)zf*cW6s>7xykrBT26SF6GT>3cukYwjPaz zeW&+=)XiCeWpiPOr6SQ;Q9$cHnv0wC55ZSeCy{oA(v}Cl{Pax+U_zhc;HvtUI)88! z*Y+{x&&4~7v2K;PYD+1}=2viy<2vx)s76~DU8)+ zEK&In_R>4UNrc+o#g95+YW#9`>$efu4m^im*_zCIiW{wg&Tzi>R@Q6LOH@+iKshIE zsAlSEwy55YUS&E_?4_~nQDa9?)^VljJ~=E%|E`dbEM+Q{u3);_m6e~0W#%svaovS| z?1X$P`Mhbo6Na>)4NjnhjL~--~wt{HM6>hi?PFYN1DAd3ni+> zTw#wG_E*{s1}MzlZ5QmJ#Iqls;*Md!;;-C@E9Q_~GmXno6rrWQChYTiJNVUm8Lpdp zj!8Yg3KPB`Wp}mfS?gj)R^Mq9i!Hh#^il~x`vZMg#({S5N;N5b9u6gJ(Uja8Sf7RIG-X4A`GvfyO~Lbt(=AbAk#_eixEElgCv(NV@UG_0Xj z9L)~&*8si8Y7o)+4+igu%Wu^>hxKW3Z17S8bTo{|#3KipY9DJ{ekYB^Y3#(0Mhds= z$!LYTekcU?Y!d3LJ=nD+s*pSU4?g|n0!1-HFkRJ($>%R;qnGYS`x;YlJuV@jE8Lr;dAuusb8(RFJpGafS^rJ-`+@)2wHxw!*KY`$|kqdu{j!MboJ z(*dexy<}Hz-@t(EU~WTC3zEN8+^bXWVesU}{OuD@uySZ%TAMxT#-m;M@R}`!?=-;Y z8(uS$*jfxZLQGy9?_Xz~&s`j=M$4r~nD@OC?1zCi#B_9}RcEc>$#e@^lJ=e*ym}nX z?A4%7vX?vB+Z}2zM=<3{W$a`J&3{pECpQ&#|2OsaA3?YO?%FL(dl-D)5ko&@N7;MsooLED8_33TLl_cIp7r8=VH z@*>cEoXARKt`vJCliJ%dDQr&-ax3iU-pS6q>Ou=@e%wH#uAirYPLE-1>LuF$rK4CE zJpb1S3y(H$(WdIBpQw%O$pUkneLoGs?D$lX>eD{*;ok|M|<#XhnlGMPCM)P z+KZ12+6ga5x%1UGcXET>?xSB|G$mh+!rlW7_{Us6{T`H08pkbox9!6qcSr{@r^9Qi zt~Ua$zUlNr`7eDwovo;$KqFfgaPKFJ z_E(1Fp>DuU<3Xd?h2Ioe0o|Ut@*4V@ybs!mPv5xn<*VMq7S$glIjAQ}^5$}~{hNiE zv+enlVquOGjVug=s{#BNJIRnz$jxfpn4d8ryCiI%0i@qZ~#JhUdV$i@; z7Pe7C+z<(%+Ci~WtmQEqJ;XxXbhsL{SIq$VhYPfOk}dzFb~n5{(TV50c*-#UMg2Q? zg6!n&{5=lt{M~pJQSSbLMK)@RnzbrWdg(97`hP&L>(ekzvVbyDF2R(cPwB6L7TvO+M@NhXoat62b&sOpzGV$6gEBxPE7klnP(SLq`E!DMk$M?>4#X;;)&R+)>TZ7 z@)ea^?*qj;h_dOga5C+JaT89_#Sk5F>ajTx+V+Pn+7%Dv_6KtMAdJXW_!tpqsa3B( zoDVP%v$Jx@_J@!7AZZ^&r2mCUf|mHPv>o)HmP4Y-dC+*QXwY0+1<^`9Kubdeg5Z`5=uXnlNA$Wd^&5>tatskl|w%eR>)MH;-Z4^RpnWj0jlJGK(a=eVkY>C@4^!xx$hQkmm-78?U=>-&VB}| zZH)wGBDmeqqFnEz?A`2KkO_(|wy+)7?(Zt@*0B=DYTApp2jtM|X>x^ziYB@9BCJ{Z z5G>qGc^fkWzI&QAKX~IP%6bupg7Q$3Wxw*@>;IeDo_mT5toD=qNJsACNqb%`RE>Xg z`Vie|br373yYd-#kAPoQDG0YZ!7=Z{bnN+FXjxfKvx?(LQf0u!{#B-|8@q9Xstes3 zRZPF9If>63=aX|cZ~FP%L3B8hPP#H(Mc$Pv1g!EQX-OJ4!K{@|xZQvm`_=exb4~cz zPf5(b|B!sSIP$ZR!Ad0)v73tuZ#DEP$%pl!&&5vsOjiTGV8}|k@t~u4`iqU&K~;l) z-=r;SxB7zrMP*)i=1cZFKU3zkzElxD5vI=%p}UP%;`2QV$ZWi#?R3V9Ik#+qs^r&j zd8D^kzCVvj;wM4>y$eA5yoY!zYCmn7p(*NgenhRYgQ&Ar3k4?6qFb+`=w1C&81=0) ze`4-T#r$5G@2RaO_DYI_pf^k5@NRF>{Cxsk8?VGG@4E~Qmv2CNz7buXJCJrL=DqSX z0s3U+(co{RY1lATF|cbFQf==F#y>B^;E~2qurUQ*|9wPlL!D{zDLc{r#X;&c(ucpb z!iJZ8ndjdcIT?%=>4;8Gx`~Oyo`C$t8cw(sR$jCN>1=S6l*-m)BZ;^J|(zCDv9eiyWt z-D9q%=TZ9jjj-g>HckOSY z+h+^LRb6M>U;bhNL%y@YTBA@x2Km}t9+SwA`6Vs$W#?biviPxHEb6T)yS(Zg%aX1@ zne%Q;NOeNlpjGTw^?8(xoX>`am7$k`1@>={W1y-zll(|zrmkC1Qqx@sUDJ;RLuMouT60~Ho;;g>@a|3~ebYyfANiIa&^0|ji@n996^^XM@){;T3&fC9ZFs(5C+a@7 z;(RlQv#F*P7|&0{nAO$ztL{5{p)`(3#vI~w9Fti3hb-nB5QwhhPq8MQF^bQVjMWo2 zvtZA~n76u=wT(_-L0hA6gR=pnEGKn$UMb2W*}C7YEn%wW=e4Z?&TH*nF$JuJC6h~@6^WW|XO znB|ugbhi2?B)p2|Jkq}~U8DUhp!Z1(1Ak68(he2tc~SnZ3)=rt!7=tfP=2~tSnZd> zqE{p_`MJ6In!$Tfwa)=Ieav^39O}dc_w0tT*8*5%v@IUI{sPtAej%Lxi_0|*;B1E- z*pPgPWi8d?hX0<$vJUF4-`>ix#-OAVN_=oFJ5f8sy;dtdxI=RLOd zOl8WgS24`@7T!ExiHT=&nJoLJFfR21yE#EoJ3HAA&+mT5Oh=8wh@|T*sP|JLK)<6< zrH<(KrxiE3d}9kjelln-MZ+$SF}g62Rg@lLxrdGm66-v_l}83)_{84WYWzV^K2U_F zJFa7^#$Rr5ukM&!ZX>MQyOV87%0S;!%}jgxZD#C~qR5)fSo9qOO#d0gytYl`0ymJGzhl;T23M|?vD&32?8UicrnPWC8-8p7Yg^xs zjVq4FeQ)=o?0th!_iQaoet(jaEOo{gIh$DSoNwF<8j4|YN4VnJ&Mf}2BUZdTqfqCq z-0UNp(8IF;Qzy-40V%tLikCZ4b?sElDm2Aj&ocx`tNe1^a5bGu*E&C5d8K2;34 zL9F%LEiUp?I#%b`3er2=)QBfnWdQyGi=o63r3 z#o`j_BMcsx$?YmQ#9j<~g2F>(Om2OKMPz^zlyY&dJL1%>k6B6CGjvvM!KSi$L32tG zH|}==%ZN>2mrWJ8IA1Y$zSsNfjQNAy#~L17yOaJ!a4Old)imO<5k}Vl@7z1`6<2-u)#k!2C7`S0C=M^jCH1lq7$&vwFK(u*& z#oR|+qUjElbyM)wh__tJlk-fLCUW87L8xxNA5-hMvZ*%`FkrhjH~Z;e)crDuiyCK- zH$BdCZn-P;=GHtZD6 zUho)e^K8*=axZ4uyiIsw^$Ja|RtZ@?GGUzCYUW*i4DVVWXLBF4vbJR|7-n*adAMw6 ztw)z&Npd9`{x!ts2X0~D`q4s^at*e%$e3H!PL%jM;ot=hsQxP*14eB1uUNU8ZNHR< zEA_{sZkvdi7Aa^veLwaetj^kY-)04mp0I!;tNi7oOt9J}T+lU%#=r$9neDAz%ry6y zAk)~J*EC@|T0148+_hMUvHQWQZ}k&G-FFMQ(1(p2^$sh#Mze8^Ls@y(e0FR}6ZR_D z%;u&=U~q{)mvCqgH}2pa9GG-~`5kIRosb07FO6V!^h2LB7-}wN0zBNO1 z<1h@I`wkKkUXk5?&a9|8?v~_~968kH!KbAx1v%XoTp{a%q6xvme5{YNGJ4?mrh)vA z>5YQB!-D^K%#z>v$$=l=Zq2(oOoS2N(_qVMz+%fD)JyDz_Wo^jL_#BKOnprzYe}EI=)@-|dPk>|7ykfY9rYt4AhdESAS<@f|N-z}bcbKR~5j(0k<3l}@ED*N4_f1Nq+TJxHw%^Z#m z+i$}ulO{|!rcJI!nZlwLbzbb{iBnfwvubHyAnj8=S>q_|lb))u$4}d`UJ_|uefvJV zwcVU;+*O5=?gNG67$x4EO7P^iR=6 zz_&LcFC&OPIOf8So@LJl9GOZsG}!W-j5an{T>&2JwL>O5xiQ8FtRKeZW|pNt;iouO2e_$Nod<>#V=fG$%=fP zZ~DA_h;94YVP2F9%S|rASBg@6$ci&ey<#=}P-@5~&$LF%3EI4`$WGEpuF9YAD%LZJ@nFvEoe~Z-}~*ui8Y=7nBUINYW4fK`t~1yTWk!p(Jg% zA@MD9BBJ|_jNSMc&Okh!$OaMOA@!4L`x$^=?=6xvd>YAHvzEr@2#yL2(Qq5f>`zUh=apkgm)cFb#ma?*n$Mz_lkKj?!sE2xi+GK}4K zo7tjmPwk2y(xyyh6g~J+P#<7R^9F};Our%O$PK`}$q%`&ldU07%K)dZ9|#re+{t`Z zH#IS3F!t6GE?Xopr8oUU*d+?9wH3m)pY6>AR`qaGy_Te+qN* za~?Tzs+H`1Qb$*38KUHD2@@J0Kx{^?qZiVCQ4zC-JF(vY9B4Tiam5AK&cX<;j-5q& z7g>|(evYsueHfAW5$5jDIzsn)z?{D}5z(%{$uK)*2u+M7Yu{)=ztEZ({QS-s`RpUl zip@cEf_RG>vu`aPYz3KLmeACa2r8;Jy_I}jlU5uKU>y1u(uuR{Nb=DN?#jtzDkqrl z0#|<_p-aDURZYK%QCSRmbk72d`X6$7UC-%mBR8zcJWfSU0VGW-pX+^{O;3EgN5!IL zBsAqTlg10~rP6OC(>R-+JtdHJ21}!7p*B{m+C@ZbSHF{H_e zbay=ABrjy>i7Q&L;3KB{HVl9(kZb@)J&5MG*s6oFr*Ru|6gxZ8Y+c zgoN0AV>X?W!tcBb4iFB28@9P&sOcMl1vwD9a+NVm=?L*{5iwyO^XUFuC-51mgx>kZ zq}wKp1X+wHsd1LHyWk?z9o5EMtgwc_Z~7>)+gzA)H;tqP#!=smD+?d!W|O(ojWFBJ z5xdvvl9tUk>8QOHbV{W%))(&bVN5D%b)^%%?$l2TitCBL<}xa~cN7gRtmRrVRM2J4 zAW)dTkUn=9PC~96=Wf;OqlI)B9pvu}MFD<3%bs1L^S2$L14jzar8zQ@9@ZHCb^tEO zHOGl>ACWmFbIE%j2kdst;xbN}VWf05U6GMZE!+0edhg{#>Wl)of&=L-nMdxY$zY}> zM}pt>(6muw$>a(8|0r=WwHF@zmrLCL^e!$s=?oQT&BeUcy ztb6RiOS@da04YQMnXEo*XywjIcl08^H52vB)p?OWi(}p>@F8BeiD!vCPBj)ho!(mb z_k2%YJ;0canz0I3p6tQa*_%Li|6kDYwH~r2Pl0s@6nQ3pF^s$O9BY?;M)w8Z(OXKL zCl?L)=p{FB8=nUE@7wan6J6O*Gi`o|XDNg)QiEjjnrsWIg#Pj?s1xW3N9MnRtXF_? zsg8V2x-;*xUzUw)S_5{Y?~%a3Ur;hi_}+tVeDv-c^s&1NzaV!YUv>RDDkMH91NF36 zkNKMHLlqA`Xp1L^EVB#8j&|Tp-ZkK-m}+d7pGst53`E3f@X-g8Fhc(bzCs=(UvDs? zkFhj0#vBGVyYl5P2gBHeQ&hNPL0Y;DhTc%-A6+nGPhZ}JfqUM--d*}^o?0axXQIca zEiEBAdPY3Ui1{O7m!PR95wr@^A^e3UIkI;bG(Y#l_!-|YBbBGJHkz3CdoxYQnnKq) zy0CZr2C)O}ta!W65h!|;fF>RxaKJYjm*(%rce8Kdj!13xXP`9iY>*jqR7YGD#9;EW!S*D-H>?0giTnih&!e!v63CHacBD}P|AA& z;T7Jrti+BlxRVd{D<)B~jWJB{uwqx0=VG9K7e0JCh#m3uCFor2#mJO1IJavMfBw&T z@Ga0Lb2m7#pRn~p!{n)261JG1qn(V#Wv1DFS?vg1!L z#CgXu(R_+Ce?qGWHzrH)?#Mzsn4JzL-imns$5ZqkSq+^l_1W`Zcfq1PR{Y~ZS)f&Q z5{}Gmf{+K+bVryqe=M7amlaa%?RT1N+t&h^nJmqE%Xsi9F}kd8z8qH+^@>sO?Iuki z$FHd@1=YnS>?)f$XnuT(Tr>CNGnO5of&L#cVCF`w&k+lo&86@q!H73l|BDp;j$z6g z62VUIGg0wZ=l$L$V1dhZnDOTl5=TqcbC)9?jEI5IJzHUi;O;7O35Mram*8E&sT7>| z4&Lx;{Pn$p`tYa|JR4@kuFE%P3+q(aXpe!oHBX(rWhq>XerBOVnH>M`9WWbR?AHCK zO5DE^Q?QyOWLz>K__AJdrjz)sjtKbLfc6MdaR4PmF7Fq2anET&l$v8dQIV8NcHciH?jTTfZ2? zhJuHn)=Y(~g{H$6(^!Dhw;b&T!dd%+N`rjjlH5X=K@CYIjPWbnm&$dE1#{ z(;c3qs9Hcm$Teo9{%vZdc!;dI`i82zT&4jQ>zGZ8G%k8MQATvvIMH(Ru52dBSH9~{bcYP@g z-{L~sn>KK5jg4T)f4VX73msDIc{awYN<4Tu^}hZKnM z;NEPK=Jtn+i2O`5H~b=ZpS~v9vQDs3`3(L0el1<8(L!g%W)rVJ4y0}0Z&G*XGP(Uz zFbp|A;j$i8Q4=L!^5Feb`rE(+bvL`h)OAuYWWocIl(&!+HF_1sPkKtE^UqSx3L}~s zcZTY(HiO$iCfJ-5$7y7IrW*r|pz4MKmUS#7&KKX1>#B|9;9O6fx55}g>&(gJ+-D@W z_7?p(?E)>Uw8H1Hi-=9YNAgHJi;A`z7i_zhLN*_HO^^Fofr{!rs=d+-zmE|}DuWYA zcWnlfI^Bk(4O_#N-dsc)7dR5HcsJ6Ye~OfRIYxqIdxeE<9Z^#heAxaSBx{E%mIdD9 z-sb3HH{V6UM~o3>-|4w8H|VkNFG*Z3OAA({Qj;CpRAe>dR+?oPsgu+bS7t859?^oU zM^q6z;>f7tJ*4dKAGEQ_n4DQ@0lq7YaE_uH+0g2O745IMcZ+Sn^WzFKcEL}k<-Qfp zFR_L3jmmiWNe!8BT%hIbujZOPO>s=UEIzW=fH`YclfP%`;Vu(f^tdw!EDAo6$4PED z?}sZ?;|ESsao%Uz_KCEse=2pA%;0p7X+!v>@mzvNA9H=?N#ZLIh;jtVw4V48Y0j0R zK@av4*YWPuJlhCnJl6+F#X8P)*dgxQLlxNKwt$|~bcRm~1JOvSf@FN_<%-_l@G)5R znsZJlqInAqnDD$_PTc;haO7TXTp26K$%560*XYgE^_x4v6|UHwlE~R9Utqj@ev{Co zD5mT$LoB$!6Ftrlk6B7+TKY_y*DsiMoBFwbU*f8DrltLtOWgnTF3w-73Yk$0A^PA6 z3YRKjr|2gbtSlvEU60Xca5>t_+Or2-?%=S?Zv4|&2R_y50>JZ8kdUG4Z-I8Uh&!{w#ihKPFv*hgLoK^mmS+$%;a|%Z;Q$NuF?T zyKn$&#AZP!>gAR|QiwVKLc^2~I6s44mDz-&)(dPk{aMg6vIFx@M0Z)?6FpQw~7aG z@j0f+O^a`9&IBdR6i_m9;IpRa;@tx`An;5+6ioaEtJ*3cU!w`H@ATvksb55exH6i= zheP{gSxo9MMZd_G(EPd&^PIbB^hrDH>(uAvGj!O$OqJOq{sZ~%?N+RbsvMA2XHaeU z0pYe^hmC!Mz)7JAO=7)aYnnBybNDfKk2!|NOXS#uujfcox-3|YsKu4rMXb|LAAGPg z4T_>4&>`vPAYk7ddj4Pm1Pwn&Tw8-6rbml48NU&q{n24fBkQqxMi_*PO?g+B)l?&Y zxIk7`VQn^L2>#?N9fzBMnOhI z9Gs|BWBuNK1nx%+RCws(o9XK8s9qnCH@%0eQJYon*JBk%dO_&7!MKg-5OhJa$PTsy z|4cZ8z10gKeC21nE>`0Y*(>m(oOo_*RW|w2bO(Kgn6VL?gJ4GUU8LVl*ra?l__D-+ zCl&(Dxx<>BHM|e(SIM*0t;4bNxFPQydLAB?M5ErtdI-5)Nshgh;VYi)$B6?D!Ru}d zR&*hZ*3xb~li|XyPrU+*Zp}cu$jh{4-x1IlGaRkv*zrZv)^i~*vgy;_Hf&R<#^>7y zq5k{V7`>~7RwaIfxLfA%%hQt$oAw*t>07bIH5#mF*mxhkTOW}snDgeXX1u6tb>ZYd z6Mm+H8LJn03KG3F*uQ^jv8&G}KVB=ppJh#~qpN!FD^FH{%&6`8PY1ZFRTlxr9`@$YG>aBMb`?;BkOtKUy@GxsH9 zW%?Jb_p-(iLs{l{p%tX)@20P0c#`Iq$&4B{loolHF+qFeg^uYI6SCkYx3K*&B3=(l?3m~At~c*AY zC-H?8h|bz7+Ym|8A4cRmk&aP!hlqd! z#Au5Q`Uh%Z-uN!2?41cdALm52&oRa{<-9`C-{zR~>MP9*{6zAV4d8lHBR&3PC%uu| zN{_^Kkbs}I^q|dsQnbdUkVflZl>b0#*XfEEy$l4QvLo%wy>!V5bN-lP zcSa(Usql@I`BQGSU``MnPV@GNEvC7}=V=KZAzhEnXvN+@y5c}6alhO`E?E>&v5x`G z^E<_@pq|v`#uxJY=q}o~?hy5TIggvBBZ~&7-RQF+Jl&K3l2GqKP+KiSPOfbu^;_=I zLA^?7)FVQL;4CixmnZl}Oy67xt4be1X7qOv{6zDF& z{4vUMu(0p^Q@CH%5UO%c2(pb&^pLI{mWW@_1ntF~)i!~?^I|uVy5b7r_fMI?Q~ktD zb~nj%yGr&Sv4P0;vl^4ephC zSA(&5E6<8A`*et2@z>?M=F34fc}n#peIS-)NPC1QeG#X^YFH12D`WK7V3#NyIm(@F zv>pIj%B8dC0e7B)p#3*58ndL z&vHQGXA2hvRc`X4^Ymm-G1dpa<;ID0*x4$!Y}dFr6t@qBF_Sb{yZwb|lldI?gvH|8 zM<(owVNwVVccHRJP;tJE219QTo{0s>(vri~!ZlcyACA&~pP*#GMMw*?1krsR8t&6c z7JU4M7aUHZ>zWX{dbldzkmtl(iUzZibw7P}s+YnpHFbXSJRROu@FI;^{}MVhb@ScirgBti{DnWtVxT0`BAzc%l^9fJJq-O?1oGY(XF&odzeFPmFA(Y{7BepV{$Hy~nfGuyv-u!Ttw5yLH zirG%=QpE>&`1~N2FLPu!x-{UFfmhI2SDFnmFy$90U4?a}((L_28CE1)>Ma=*2Xf4P z=!!Gu_0Nd-&4r%4Y?oV}eJ34Z1=zo=38o9`;E93dut0MVZ}z+fK3T_Nm&tS-SZcz4D|?PN)vuCw zZkBv@w3z)mEeji0cjL^+gnx8#{>}qp{!=CHUx_I=THOlz^#u5c)iSD4Y>!PNACto# z9CgUZB?dpPQ1R3oOys>bCQvYw{Fy2Zo7;9#sXPzN8CO7)&G&PzHXFGK0WtJXR|55N zvZdX}Ce!0ANPGq={zp$0A$+$;+qBNnRY&-YG zNCxuyG&onInM7jhSm?V!T-dBVl+0T8k=9NhLEoro0PkrAcG$?Ime^5u;|?<2*Awh3 zzSE4FcU0q39t{qzB=sV7#z4t}DqP~2!?)!jcZnIg4VQ)ooGE^LC5K~W^0^J)tx?<@ zOH3w?CuOonxYLn#@Tf9|o=aB7ga=Ms=TBwmKCM7j4L?JIee?xuq6R~$8AR`lC)3)7 z2x(&#obQ8FE?m`%p4|{Z?mAu;jL2C) zZ4BG+kYvah(&CCi(EeaHnPT&TJm`K*E=R=^oqP}aO}I`<&C~_WOD2$VSR1#SSJIU2 zZZIxU3y!R5AkEe)q&?&jH>dXsZ3(R-!$uB-(&1a^`A`wcM%mFNFCLS}@fB3T&IY}X z=Tq#NPDOty;;hge#A1^+F4^jWen+y%&~jru@kFqGeo|nzeKAChPHmhX?Fh&3CehBr z3SjI^Y2w+p^tVbXg=5#rPZ>|-W-le8sO8MIWqo89rqdsP?;=6IQw725eJ<~@I_L1^ z6;YV%N*8q35HFimB=GP>Vi()R_+%fT>%497x3LiNqe3^1J+B(L8Eda z?Qx7u`kX+SN*g@>N*ji<@ifHZIo)LakW_dc=WaZ+1@YhsWauq@EN>q~Z2kJb5x%Xo&1e#n4``& zW@@0gWIB9Hdx^_ch19G^R_I^86gKqMp}dJIYxeye3_C0A!FEV+&NnySci9{wsqo|e zT6hxct^8@<9A~~^j~y?0^qFbhzYZfB&LdvRg}=2DF<3{N-#Wm9J-^4CALlZVl}Hcf z+{bUilskItkvpYu;=ZuQ_`ZdRwrAtmfUD&0E)kz``VwtxI05`sGq!b#9N+i!6{f9z z$?ZryhtuNQ@SeIQAC`X%eP^ykEs!FlPG=IbDPtxp^4Yd$6TAFW(_yIG2y0V{ZZTZ|yhmn~a zkHLLbeCK|FQLxB_&XJbp6^Bze)?-+6PMW_vwF%@!R_xy8Dy+7J276<-HEVTB9p)@n5?Es!5FEcj zpiCLe+w=^1jb@bObo#95w-MMo gG6;=(pD4Yg@z|}?vcNw^_(zY6Wwa$1*m?@;b zZJiA3g*t@v-(Z?fynyYFLK5H2e!@L_k2Vm)=-smh0SDe_YbORy@d7=k8hLG5ZKX!gk>HE})I(oqTb zG%9iI`BIvHT7?y_X`;hxOxbzvw=j){(GQ*Wtb4g5zxa$UzwxyNuj8MMSL9{cqJ5ee zY;g|dGS7gT(-++4bQ}f-E3s!aW`jdW6XfhW3JN?&0yZk}<<288PSA~hIz52BQfke+ zX`I8rsDn_(OF_EVZ#+EJoiE&{#Fy=hC%$36n6OWluJO4A*FVR?W8Yf*cw#p$YTSjQ z&&z$XQ|L`w1mjgcp(zy8IZX$a=9GQ}_^I=N1K&{&kBa3qGCL-}}kvCNZdNx=xeFzhnwl zHIw${;moO{x9G|@XK9g?5>Xza2Q8_W$XNQFhTB?E6=yL~bu)%wolE3_b3gI?Qcb-b zj?wI98Sr_VK)?3Y(kb`v5UVM2r2b|zmo}up=W@*nGG6N&9q4g^zB?cbb}m1-B&}t% zsYZxOtGUgH4$68zj5LNCIqm;-24lRowJ^o&Y@jgE3cRPb)BAT4Xq?~=Nxk}xUYz@p zUYT7)f2gz&@9cWIwap!N9&^Wo^}C6IbsYCgKpL0!y5PyPH|f$vn$UP{0`a+{4eGIG zaN?*du(bwI_C$x4Jby>J`__}*E$ZY6s$=;;U-H`b8g*z1rVo>KAaD72E>&(d-4|>I zlIsQDUAB9P2%?60?hPeb12&OwcW=>?7QqXiFkP$Ky?08>86 zLxz?Rm&T1JQO|tomxE?-a^XQDyZjH0rct!<^#I~)6khn?Jx7Yx2XXzj1+<$h=YHJ! zL=+S!(wI^9=H=^rsH->MEnLQa#L}=MrY%rd%2*xk%DzHs`-hh4|W8P+f&b zwEliPIg4`zLa`ta)w@bYiiFSDlo9mwO?Ples-+P%an!Iz8NR>q1c~0w!bw?z0MAm7 z6y)cT^on9CX2<;FoLZ(I`sDxU+x*|wxBt+;{df8{{~MTs*T1>&6@gXM?M*pa7%aeX za;og}?%TL2MT-|T9OFVB2w7!@hFC6;V9eG(!y@C~TM7|(BW;bNV6YnEZA3ZU9bU^SnWPD_EldWY+LmaCc6AV-|u09*W@ma zTN{PZZ(iWibQyO2#X9H|lyP|uUoqy3h(EG79diCOKmfJHan0KNaHkV^*=IWb8Rfu+ z4R`{JQ*8M8Hy!!LZ2@SkAtY3M*oi|_ba~m3DD0T-%)86IM)eULsBjcu?1&Vs7M{B< z`%yUamNefU8b{9zmf>SHUSi;oH?Xe$D%#DINBy-LY;#I1wMq$triffv-loac=t;35 z=PH@0Zx4_5Dbu3#s0bh|(d0W1A64Vn|(An#KxwPaOUU5P6jQ=`tV3hIG&WnEJA=rprN z$XQdotj&r_d1kx5H6OOO5q$shWs(OhfCkM%$lSXR`~RvG6mDra!sjx?D;C0LD-Twm z2z{vSTe4>9TNv&n&#qV_4GgaZnKL%vyiHE5hVo%}Fzg<#uR8%{=dMxTHEJOHNDJRA zuwj|CDt!I-vrM|962CTAosD^VUtm8dkcsAR;rByB)?l3uEI(??HifLl=#{(4oRL1z zfBO)YzE$8`wVq(ek0NqpP`@C$(`Gfx?b!Mk#Y|deA0wzIc{go}HiZIP@30y#DP6FIeiJ-}EVCbXTX3TFarijFo=qqsC<(aBwJovX<9&Kz;%N>? zoZE!;mYT%1-I3lcu7<5A&tarpB1km8FmpF(vwr8UgVy!WV0$1J6BZ9AZ53ZpGQgik z2-$G&-deLOdj7`ky4rl4nKbT_b7pS_y(WQ{$Kd8Up@(SXAR{%Ih7Nv5>ehY4t)4A7 zGR2733#x|XmHTMyYGISPUyJv>p~(0qo6?g)I&$;EN}`dvh@Ol&g31$ASvAw|P~TBQ z>rNeqg2w$QI;K;2a?d63>NAEjOX4wQ#XF2G)8s#%cHjfTSSqs5@oBGJ0?zkp;LiwU zUKR{tSgsvg^s|zwnQ6!0Ry1L|ix1O=O%#uf>;g04mh1lF5YAP6f_V#OQ2*3nsIIve zopy}Gr1?`I{>4KunjD6k@8n{$<9;GJu^4ksc4J=YKt4gQhK3{##JpAdWXax22tB-y z+~yV7rxCXyGqWAlW);J1ojs`1se<-hGOSm~IJhi)t@?QmH90v0=1hHo&F>e(GFM~v z=xtfPXj>gqB;`f^`Ygq|c0ZxM%W`O}zcxQ}Nj>fodi@_`&*6fCckowT5sC!fNWI$# z2)_DMIA%6Lr1M&^*<#P1PsoRG-`f~AD-)~p`e>WSBajqqX2$>e4$rta)HoPPUwaN< zeFYz+O2kZXDZh$i>o(w)lNQkDtIB>ou>%t<=Mn20F6>=>S>APt5^M_k3GJQ>Y0@`! ze%r^L_;}vm1UkRQE@^HXjGgkCHnyWAw1neLw zUWqM)>RleVEh`SHja{JrUJd^ z=qF*2=0)oJ17Q3hM?NMc9^>?^crX{(Kg#ZGP!5Y7A2k4^G9V z{K@C}5IF80tXyoz{s_1NGE-&wX_rQ#+@e#k$$K|!y{p3JmszoS_2)?Xp#897rWv1J zYrv0*RAyJ0XrpAqP$qB4Nj#Zz4DKno@-Bm|*!2Z}(0+{I7rL#;$ec7|r%jZ_^LKI} zX|cfKA8*XB&AE<}Abpy=qY*o7vf-!u0ABB7FVwf_k@>ZTtWjSZOserTxNC&#?kY{YJ+5s{=0;=DPHbMQ};6yGIVUJGh#_knzRhAS_2>7lCA zP1*WowTz9xu9&myFqj{9Vl8yPz^h5sVBTxWM>icMu5T}4NO>~O7P1&mx2ds@pLM{X z7z;Mn+83VZdDQM`sXO{c;I)`b1he+@kC2k_bV zlJLMzQ}()=CO^Vlo*)19(Ld(5(KBuS|1JD~plGojHQkfUXSb4;sf2j2e=!{@uME*=9f(}eFLK?di+mpYl3Ij)pmi3)bZ1pZW{$Rk z*nu+mW4RPQF|~&uM=q0mVG0+eEx1)$VoUW?9bu>R9#Wbxo%(ie_i4|mVTNTZfXRS3 z+HPY`BU1(l&fd?|S4Pa#%(nuCr=8s7d)KH*dOV%`wv#qK98X(iCec?xXGmIZ4Lx!? znJ8673;ez0g4Hh+VN`)DIdDO+OSO733cItJ?9xIZelm%}wGlK&H;Xn+?LeYj$EjT{fl1Hdb6j6I~EsHN!0p6g}a}) ziUxBcdfn`EJ*S0m9}Kb4s+&$fsD`hcb4g@wACngp!pM7d(XkE-Xx=z^+9O*;Dh>}K zZKo

RClvZ@Getx;lYYpDkn(&CBTmM|Dh>HO6?qT{P%vH@9_w0zBSYOJ#2jCG)eL zVN*yYF|5>sAkUwS$rE4t!c_>a*^V@0m=PVY$OW73eIhTOIzV2_d2Zzo7j!VT#!b;? zxJxFD78bc;v9mGWggeZX{Z=?++I{li_(Rg`wuJoJrU4a;R**UK%upg1!>NsUL1zk` zUQuZevop&Ab2Rr5?TO});`@?r_R#~4U4)7!?JN|(sPmb3SOwkm6d+_3$6Wbrjd^Zr zq+cd# ziY~YOSTTj)ayUB13P#Btr7f}MsKWbDS*)T#+J_hg!@p?67Ihpw>Kbjh(Mk3we5NWM zTIe6vPR#~4(8$gV&UBhSaiVtcDvqlYuKqs~ zVT5rzv`n<)O=Oqjw7kD?wSygdLV}ps{Sw<|e~0AZVqRt9N^o7-PEN4;eEZJ7F=zS> z47xNHjLOqt#>P(6SQ7y0yAeXx^m6f52O+*R3wjT0!k_uNd`N3AQ_^q;R$PomuYEIN zwShbBDH#6yg(z>x2)UC(Q8uSFR1U zvvt@~ccG$I$}+hl9ob3OrTHz>2kT{vb^Z3EA`ytz#ovZW4lahF=*;M7#^a}Zd|C! zzT7QlJ()6WZ_^-($-*)H`9aiM{{cb@9+Js2!foTiL6|0F<*aNU$bJdDiF-|)Sl5dq zQ1nfUyEWaM9X?BnH5$}`2_>?8eUGqT@V6xj`g1`}Nabr=*$b8D3|YF#nUzEtaWm(8 zvaZ9{)13v~VB0Rk9?6JDyB=#)6J}bkKW9kuH3O`_e2i%ib`nUl+tFP}V|*UH3f2^q zfF$%cb8R7qWm#*$H)=DHa2?#4tbwe-mqy~6rOc}y6Xw5{T7v52C`JxBOk01;gD6Oz zvuG)W*z76@KdOli+nV9o4jVRipA0X)KLeU3WrJsQ2cDeY0{jahWBueq7_vx$qFPTM z>qK>S?U_ETU*5xLE2^_%ZH|f~Uy}v%4&mo~7q(}zDIfgSi0^+@4jl(JWB$)G=%=s2 zd#$@on~xr$tH$kvjQlZ>Hf;f!mHP$Swcg>Bb;f*=bQ+2;Y4e(CQtYVTmgqT84eK86 zfc53iu>GYU>0auI1s;0^P3{qTcWg0?iPYmu-imm$G2I}(A3*BXB!h@raIq6!WBR8i znDD`weOYbE8;jN0qL|6d=3rO8|JYZUJw{MwR1Bfjx_!iEdn8`}@e#gkv*8bJ?tl*c zN?4Qh3UbaJM8E4zxJ{!GF24JUmY;O^<_Htm7pTk5(=y=KeNM!zANFwlcP-4kWWav( zRA;+J#NepahVbHrBkL#i30)V-k*2T<0?jxIUWL`bn&2OJBg2~YWY^-T$unT{?HcSE z@(p}P3l^09J5hY$GiSQz0v65wirp=S*lVuEhi|=0c2}FS_H|-b?EH!B2)c|FfjLC( zMiC@6?1n?XJor}sSZEvd2^4w{Qd?_Rwj=i|e76*M@DBM*!Y3U_TW(04)SV${Za$fJ z$Ciap_Iys}J}5Cyg(;U$f!Kf{lE=l&&8h2f*Iju&HvR&5`97p-YwBU8jEKLy%8Yks z;z8na%Exq=1gF?5^8ppcRO0@zu>O(~hBRL$8QC>Np_{=!A~!bSfC!?W%A+mo%I0wd zlHIe&xL>=_Jv|s}Ee&Ak+w<7&Vn%xh=t82Pv)5X2224F$anZzL9Ij-_daez^E8k>! zr*Xj`BW=mny|U%YwMG0y(F{EEL4jXpavvowJGpEl1vbn5D0=Pq3slbdV)h6jx57NA z@RX7rJNLXJE1r=hynz%@dwD>&uYBpH-Wnm;>b?@WX8%4T8NXzh*-nX z#%#&Td(hwi2nuq)L9lEwCcLtzE7pqmG?_^_b(STYb;b$uHnSM5T!fw_T6{M%9LBv+ zX8oPFg2_)Ctl#vVyZRyvV-FmI$gL^lcvTyA@0|{dYoajWtOr)^lV$5VyWydu1uL+* zFxRCSlMl{Dzob54mirACqFveJxpJ)Eieun9^Ak;fCot7tH;{nG6G__gV7PI)0DBCQ zQ1|#CKH%g!+FND9PpRpEmN!SBNDxV;`!xz$e-Y101cvJ>4L-qb7?B+s1T9gIAlh1s zzqekUw^^77lb?C8S0_k8m*-|2CvZJdx2xld_QB};Lxz8+smG?t|STN?;zE zgwjoO(;;S~246bM10)Z+ae1YnwvrnFvL>Av8rX#9&gZb=xjs+V7_hAw=1|1PFi)pH zfg92`Y{~dN(5IltiUcmBv{@&+J;KPGw(I?wP ztoOBVAi|3@IcyLv4i;wbM0ehjm*yurw_kr>T zd8rQ={k(+M>j-9S$wN~AMh>Fgld!zdgb&{=1qr);(rVSyq}9;{^&S>O=sX3;yWL3s zz17Ecx{K|~ zzv3=AKRAx&nF%((-|<|6^e--=m!%Q2Ey<7Od!%F5TcYu21IKKRAWBQFkfO@>oMeC( zDWAKT)Zb8{0Xep`SJj`ChWw=M;d{syvo4bJK7nS85{UL1)4T$%)sO5)%+sKdlBbcJIy$yk4Ywl1DeO9gq9XzhoB zvT4mUZMg;6@?DXPjjv!tpO*=ciAqj1^^mtodp#}MUd$=X^(WJka%sq$`=mSLJkbz2 z6W8jCoUhD%E`E3lZH}xb;J=qH$+N3A`o*Ok8bHF&F^uHGFYi#< zIaIP*RS?k!6xucK=dwQDCEXWPh*xks6}c&SitsL8 zI-o@}_E*!QIt$X;8AJ?pMN~XSaF`u@U$EkP3gOzH&;?TtkhBphqL#M0;-4k!>L_ zNS^i^ns%b67%wcqGgXc&O(#y(d-~PO%I5{g$DXKteJSb|0Eh0)w$%% z8%$dEZLW5-6_u+KGSKA9IR62aByztw%{(Fetz}OXh;G|4We*-O3d8G&RgX2z`#pu3 zqE<{Y7S5(tdY#05-U0ITOBXF4=S?3hO`(z>vpE&hFJ#l*=d?&+3GGfvrA1?e+qF%O zV6*TnOokwm)-i

nrM~Ydzzs&o~kVPn`k5yFr{P~IcvCsY7E@N zFiJkexAOt1m3$-9ddsMa>P>3b;7UUE4>4s^Lg}>i>!|3yxzF_@ee|I24U$3RXxoTB zQh(+Y4cTeSWDLutEnjyNl~)loWMLTL?uki-Z#$8d|4ITvP?&~=n`4z0bM^Wt(tda_ zZ5;lSy1#lt5+;8oB`=GK@0u@k&#!ut))Pw(KIvSqIqwh(Wj(@ic7EhRK2@Pw+xq|7@t8#O>!WOlXuf2E>SeC?0n&-d3S_cE+;St zpE31*ZG|D#uL@l!nb1&eplAC!scS_b7cn-6hQCnfVik8$_pwzp_-YJwk`AF3voncT zeh$;*)cXH4cdkKETv;3+2nN9s2Sp`3bU=ti9vL4fNZ;F_EMY}gO4OR*3ydVZR0;`5 zB*}n?f{0T>l-D9FMtP|5fr!kU+e3k%u0d8;7!?qaAikn2;USE=ZDmt6J&xn#+xApV zSI>uE{XcY{xqZ(${{x~GBhb};6TT6YLXG4mSc=EP$)_7p@3wQ`VNpeSxA=Lj_h#cu zeJvy@9s}>e3M80U0XeJ>jZ0Jy>vs7Oa~a5d85M5KS2%? zPXqNl0GbPZQ1Iq%5dF288oj9k{KId;vzISWVB-?1dQlo!L|g#NnnEhGG639wf%5(e zxKeTnlzsCdHsTCg%uhtSmgZBPztSi&OC;KA07`5(PQ zWwu@hqvm}ea%qN5d0DKFZMv+7^|&7n%>~KCaH>?8MtL8Q$SkAV(Ao`6Fn0z6qD#}| zlAy83_l6ve?oc3M?kRa^K^2754#`~Z9YdKdE+}VVHI(KZL*AmRvN?1D_~jph?!941 zCHfIOt|dTeZJj*&Ng`S+0EA7tq560qn#A5uO8FDHcxN3YD#`(?{lm!PTN#q_(;;5gRfC==m@S(=X}68RoF5=XY=XK{>|XzwGBHKvX%O^Z0$nTxFKOX+!FwPqsAXaBq zkdU1MW3qUG}<@ zRluW@%}3LD3A5-itxj}C%URZy2a)UNrqc2I`fxOHWW>j7Nz?K+JWERO?#5BHpW`ah zENVwXR~&JCe=_+?-6SUPdOe)}5^&IR5iR*{f_%z!TPAtLjONR18E-IW|8IR|l{*$; zzwt%H?w7xl1x43zFFV~Nq(Fk&8CRMdyM$GPqsWokt5|x)8$>}xsC67WThYBAJ0De& z^^c}98`|cRTc5Y%gG;+e=s!ou?IZ_!jbslgtXf1GUSGg9P3Fv4W-7Dy9Q*F9a%1+) zFlLI^SYschG0nHn#K?6jeP7j1#Pwm_S(=P=_Tk@I=gB7HJ_QBMyrg)yhPC@pUjxu`45@$v+`~pZ)OId>rm5t-xig zr_m2U7;!e@gl=;s|zarm^IkTUTW9-`uKuA@DEzP<_PSUZyvF9Dr9 zB^%YHn$X)5M0E3&Jg6K1a^c^sAEabDQFCOQorPsny~e+VSr8dg67d=T0JjbFcq? zNDM5ezlDEK4Y8(l?iAoZ$UOF$8gkZ$txGp|0-ritIY%q|ow;*O)!gY<>f|cV_mg#L z=1!haC+95CO4e9Y`M4#7MSK+==e8*(QByV7^;w-A7oZPYmu{}(sycglpaxqX|5Z)t zT(?7Y{Ja%<;&rL#I*qC0t%7yn^^J{OmmhWf%e8vqb*bk%Y^dWqztDl#H#Ty++tu-P z;dc6`J{Q>|0 literal 0 HcmV?d00001 diff --git a/onnxruntime/test/testdata/lora/test_model.py b/onnxruntime/test/testdata/lora/test_model.py new file mode 100644 index 0000000000000..61cad8d131155 --- /dev/null +++ b/onnxruntime/test/testdata/lora/test_model.py @@ -0,0 +1,50 @@ +import onnx +import numpy as np +import onnxruntime as ort + +# original input_X and its associated weight +input_X = onnx.helper.make_tensor_value_info("input_X", onnx.TensorProto.FLOAT, [4, 4]) + +# Original weight +weight_X = np.array([1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16]).reshape(4, 4).astype(np.float32) + +# create a tensor proto for matmul weight +lora_param_a = np.zeros([4, 0], dtype=np.float32) +tensor = onnx.helper.make_tensor("lora_param_a", onnx.TensorProto.FLOAT, [4, 0], lora_param_a.flatten()) +# create a tensor value_info for matmul weight input +tensor_input = onnx.helper.make_tensor_value_info("lora_param_a", onnx.TensorProto.FLOAT, [4, "dim"]) + +# create a matmul node for lora_param_a +node = onnx.helper.make_node("MatMul", ["input1", "lora_param_a"], ["output"]) + +# create a graph +graph = onnx.helper.make_graph( + [node], + "test", + [onnx.helper.make_tensor_value_info("input1", onnx.TensorProto.FLOAT, [1, 4]), tensor_input], + [onnx.helper.make_tensor_value_info("output", onnx.TensorProto.FLOAT, [1, "dim"])], + [tensor] +) + +# create a model +model = onnx.helper.make_model(graph) + + +session = ort.InferenceSession(model.SerializeToString(), providers=["CUDAExecutionProvider"]) + +inputs = { + "input1": np.random.randn(1, 4).astype(np.float32), + "weight": np.random.randn(4, 3).astype(np.float32) +} + +outputs = session.run(None, inputs) +print(outputs) + +inputs = { + "input1": np.random.randn(1, 4).astype(np.float32), + # "weight": np.random.randn(4, 3).astype(np.float32) +} + +outputs = session.run(None, inputs) +print(outputs) From 7c9bdba91fc6388c5290d99426c42b0a3c0f5972 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 12 Sep 2024 16:26:22 -0700 Subject: [PATCH 45/84] Add convertion script. Add test model and adapter --- onnxruntime/core/session/onnxruntime_c_api.cc | 4 +- onnxruntime/lora/lora_adapters.cc | 2 +- .../python/convert_npz_to_onnx_adapter.py | 47 ++++++ onnxruntime/python/onnxruntime_pybind_lora.cc | 18 ++- .../python/onnxruntime_pybind_state.cc | 17 +- .../test/python/onnxruntime_test_python.py | 2 +- .../testdata/lora/sample_weights.onnx_adapter | Bin 0 -> 37672 bytes onnxruntime/test/testdata/lora/test_model.py | 50 ------ .../testdata/lora/two_params_lora_model.onnx | Bin 0 -> 462 bytes .../lora/two_params_lora_model.onnx_adapter | Bin 0 -> 224 bytes .../testdata/lora/two_params_lora_model.py | 150 ++++++++++++++++++ 11 files changed, 226 insertions(+), 64 deletions(-) create mode 100644 onnxruntime/python/convert_npz_to_onnx_adapter.py create mode 100644 onnxruntime/test/testdata/lora/sample_weights.onnx_adapter delete mode 100644 onnxruntime/test/testdata/lora/test_model.py create mode 100644 onnxruntime/test/testdata/lora/two_params_lora_model.onnx create mode 100644 onnxruntime/test/testdata/lora/two_params_lora_model.onnx_adapter create mode 100644 onnxruntime/test/testdata/lora/two_params_lora_model.py diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index f6fca38b8750a..42c342356af88 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -2839,8 +2839,6 @@ ORT_API(void, OrtApis::ReleaseEnv, OrtEnv* value) { } DEFINE_RELEASE_ORT_OBJECT_FUNCTION(Value, OrtValue) -void _stdcall OrtApis::ReleaseRunOptions(OrtRunOptions* value) noexcept { - delete reinterpret_cast(value); -} +DEFINE_RELEASE_ORT_OBJECT_FUNCTION(RunOptions, OrtRunOptions) DEFINE_RELEASE_ORT_OBJECT_FUNCTION(Session, ::onnxruntime::InferenceSession) DEFINE_RELEASE_ORT_OBJECT_FUNCTION(ModelMetadata, ::onnxruntime::ModelMetadata) diff --git a/onnxruntime/lora/lora_adapters.cc b/onnxruntime/lora/lora_adapters.cc index c517f5dbe1055..6c8d2630f0d1b 100644 --- a/onnxruntime/lora/lora_adapters.cc +++ b/onnxruntime/lora/lora_adapters.cc @@ -69,7 +69,7 @@ size_t LoraAdapter::GetBufferSize() const { } // namespace lora } // namespace onnxruntime -ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapter, const ORTCHAR_T* adapter_file_path, _In_ OrtAllocator* /* allocator */, +ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapter, _In_ const ORTCHAR_T* adapter_file_path, _In_ OrtAllocator* /* allocator */, _Outptr_ OrtLoraAdapter** adapter) { API_IMPL_BEGIN auto lora_adapter = std::make_unique(); diff --git a/onnxruntime/python/convert_npz_to_onnx_adapter.py b/onnxruntime/python/convert_npz_to_onnx_adapter.py new file mode 100644 index 0000000000000..ba9f3aa8802f4 --- /dev/null +++ b/onnxruntime/python/convert_npz_to_onnx_adapter.py @@ -0,0 +1,47 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +# This script helps converting .npz files to .onnx_adapter files + +import argparse +import json +import numpy as np +import onnxruntime as ort +import os + +import sys + +def get_args() -> argparse: + parser = argparse.ArgumentParser() + parser.add_argument("--npz_file_path", type=str, required=True) + parser.add_argument("--output_file_path", type=str, required=True) + parser.add_argument("--adapter_version", type=int, required=True) + parser.add_argument("--model_version", type=int, required=True) + return parser.parse_args() + + +def export_lora_parameters(npz_file_path : os.PathLike, + adapter_version: int, model_version: int, + output_file_path : os.PathLike): + '''The function converts lora parameters in npz to onnx_adapter format + ''' + adapter_format = ort.AdapterFormat() + adapter_format.set_adapter_version(adapter_version) + adapter_format.set_model_version(model_version) + name_to_ort_value = {} + with np.load(npz_file_path) as data: + for name, np_arr in data.items(): + ort_value = ort.OrtValue.ortvalue_from_numpy(np_arr) + name_to_ort_value[name] = ort_value + + adapter_format.set_parameters(name_to_ort_value) + adapter_format.export_adapter(output_file_path) + +def main() -> int: + args = get_args() + export_lora_parameters(args.npz_file_path, args.adapter_version, + args.model_version, args.output_file_path) + return 0 + +if __name__ == "__main__": + sys.exit(main()) diff --git a/onnxruntime/python/onnxruntime_pybind_lora.cc b/onnxruntime/python/onnxruntime_pybind_lora.cc index a98b8cb38a36a..41647e8ec3034 100644 --- a/onnxruntime/python/onnxruntime_pybind_lora.cc +++ b/onnxruntime/python/onnxruntime_pybind_lora.cc @@ -89,10 +89,11 @@ void addAdapterFormatMethods(pybind11::module& m) { R"pbdoc("Enables user to read/write adapter version stored in the file")pbdoc") .def( "export_adapter", - [](const PyAdapterFormatReaderWriter* reader_writer, const std::string& file_name) { - std::ofstream file(file_name, std::ios::binary); + [](const PyAdapterFormatReaderWriter* reader_writer, const std::wstring& path) { + std::filesystem::path file_path(path); + std::ofstream file(file_path, std::ios::binary); if (file.fail()) { - ORT_THROW("Failed to open file:", file_name, " for writing."); + ORT_THROW("Failed to open file:", file_path, " for writing."); } adapters::utils::AdapterFormatBuilder format_builder; @@ -111,19 +112,19 @@ void addAdapterFormatMethods(pybind11::module& m) { auto format_span = format_builder.FinishWithSpan(reader_writer->adapter_version_, reader_writer->model_version_); if (file.write(reinterpret_cast(format_span.data()), format_span.size()).fail()) { - ORT_THROW("Failed to write :", std::to_string(format_span.size()), " bytes to ", file_name); + ORT_THROW("Failed to write :", std::to_string(format_span.size()), " bytes to ", file_path); } if (file.flush().fail()) { - ORT_THROW("Failed to flush :", file_name, " on close"); + ORT_THROW("Failed to flush :", file_path, " on close"); } }, R"pbdoc("Save adapter parameters into a onnxruntime adapter file format.)pbdoc") .def_static( - "read_adapter", [](const std::string& file_name) -> std::unique_ptr { + "read_adapter", [](const std::wstring& file_path) -> std::unique_ptr { lora::LoraAdapter lora_adapter; - lora_adapter.Load(file_name); + lora_adapter.Load(file_path); auto [begin, end] = lora_adapter.GetParamIterators(); py::dict params; @@ -143,7 +144,8 @@ void addAdapterFormatMethods(pybind11::module& m) { py::class_ lora_adapter_binding(m, "LoraAdapter"); lora_adapter_binding.def(py::init()) - .def("Load", [](lora::LoraAdapter* adapter, const std::wstring& file_path) { adapter->MemoryMap(file_path); }, R"pbdoc(Memory map the specified file as LoraAdapter)pbdoc"); + .def("Load", [](lora::LoraAdapter* adapter, const std::wstring& file_path) { adapter->Load(file_path); }, + R"pbdoc(Memory map the specified file as LoraAdapter)pbdoc"); } } // namespace python diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 175f10baeb88c..7cf790e7631ae 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -68,6 +68,14 @@ namespace onnxruntime { namespace onnxruntime { namespace python { +template +void print_span(std::ostream& os, gsl::span span) { + for (auto v : span) { + os << v << ' '; + } + os << std::endl; +} + namespace py = pybind11; using namespace onnxruntime; using namespace onnxruntime::logging; @@ -2018,18 +2026,23 @@ including arg name, arg type (contains both type and shape).)pbdoc") } feeds.reserve(total_entries); + std::cout << "Adapter inputs: " << std::endl; // Append necessary inputs for active adapters for (const auto* adapter : run_options->active_adapters_) { auto [begin, end] = adapter->GetParamIterators(); for (; begin != end; ++begin) { const auto& [name, param] = *begin; - feeds.insert(std::make_pair(name, param.GetDeviceOrMapped())); + std::cout << name << ':'; + print_span(std::cout, param.GetMapped().Get().DataAsSpan()); + feeds.insert(std::make_pair(name, param.GetMapped())); } } + std::cout << std::endl; } else { feeds.reserve(pyfeeds.size()); } + std::cout << "Normal inputs: " << std::endl; for (const auto& feed : pyfeeds) { // No need to process 'None's sent in by the user // to feed Optional inputs in the graph. @@ -2043,9 +2056,11 @@ including arg name, arg type (contains both type and shape).)pbdoc") } CreateGenericMLValue(px.second, GetAllocator(), feed.first, feed.second, &ml_value); ThrowIfPyErrOccured(); + std::cout << feed.first << ':'; print_span(std::cout, ml_value.Get().DataAsSpan()); feeds.insert(std::make_pair(feed.first, std::move(ml_value))); } } + std::cout << std::endl; std::vector fetches; fetches.reserve(output_names.size()); diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py index 1d7399d19e947..c9cb72c50749c 100644 --- a/onnxruntime/test/python/onnxruntime_test_python.py +++ b/onnxruntime/test/python/onnxruntime_test_python.py @@ -1863,7 +1863,7 @@ def test_adater_export_read(self): actual_params = adapter_format_read.get_parameters() self.assertCountEqual(params, actual_params) for key, value in actual_params.items(): - self.assertTrue(key in params) + self.assertIn(key, params) expected_val = params.get(key) self.assertTrue(value.is_tensor()) self.assertEqual(expected_val.element_type(), value.element_type()) diff --git a/onnxruntime/test/testdata/lora/sample_weights.onnx_adapter b/onnxruntime/test/testdata/lora/sample_weights.onnx_adapter new file mode 100644 index 0000000000000000000000000000000000000000..99f5ab65b7de0cc1091c32b26162025c158b2da7 GIT binary patch literal 37672 zcmb4r30O_--|kXr(p)Na4 zsf3VZN|MBzN|IEPgtPzWoNqYidcSjhXIaO6QB1KM!x(nMgIN6=^YJGf)v z&RW~6z_%)$qz_aPrnR{Vl8#P(+hQ%Dd~G9)S)?pzc$h(qi#0eN7=*JZCv4YM6%y9F z3cX&~36bU3AZ-%Cr#CEw&_itygf`4Sy#Pz6R)K2_!oIsb1s|JPSSEcM~7c!4BNmiLTvFY`?tLn~AS4a+DX z11*I5?$_x5cMmw(_8amp*CUtL%{T2CfjVb?ptMR2`=y!*-gygo>GoTst=B*ycx5F$ z-ZLC!9T&XRQ`?~LnQt(#!Bhy?dxreli_ox6gI(RO#H6nji1Wo*)TkZ@-Y}H-ZtfsK zn~tH^`1`2wV-OUbSAwhAN^DTDoxmOIK_tnkwB`A3@=ZgV4Y?~~+zNy{6|ERk`v!Zb zsSDCD6QW?44(0)gFnIS{_@QPcguZiTE&fVG$6+FP+YJ}F&IX!wW(UkU>B?ewf}T^Z z;0h0G;Zn|F3^cg|VT<+PZH|^ucK0i7+;!6ABGx(SkjO`3YV*PVZP0B z(KhfHEI%t{-F_RO`1efM(eENkjWWsTp>ecHR~xERF2TjwRzhQL4!E7sW%*OJAaPza zT$^Gbv{rwHWmbFWj^35f$%jC^jU}`$D1}%P2R!4YBgC5@Cbz=Xgq`c9LRm=yz1l(X z_A_^3oUA`{=D@s zxq8v!Ag7@D8y2rw>*V6JGAP(-(dzZ9<^``=9q1Iadd>WK!<^PHUcPjh9JBaPIljo> zNJrlO9be@8*~VXJ$TC$@Jfe)(f*hJyBGBqlAIRutkw`-PL@m2g>ZF-KYJzI0Y@!M+ ze$!5u)Lx>ib|;YJyU8R%EF@jV*J)OJGoQ8p6isnB!si?wNkVLl>FiG;ox8k-dc66A zN?u(fk{HIvYetB&k0OmLcul!aJLuffE*emnnP+(+k?bzNPIT6EiqS!L$oWB0G|N1l zB>jjMV-&uNA>9HEnPo*f8t#(TnQw^B@AV?TDTJsmxkBsT#0{BqY?7RILNzUA>WC^D&mn42S2*Ml7Sj8aAZr z-Fy1%u0Vn<=8&*OPx+#~X{0zJo1dGUMJo#)(w6lSaz5`0sa*GrT=nT7oyp@!8NTLK zFL}~u2aiy7o$K_zQ67!^u$$UX_(X#oB1lT;0ot^9J}tsz%B_8$TRgRaCatg}o4=}& z(a~i*_i;HH{r<7YO*!mo+EPRFw-t!0bHFIC7RtG(dvd)td*$Dr%il50BHpzbdArMNN!rO@H0^*MP1#>T^Q$dM zQ=30A?#)rjC_i3y=$SZwSPm(qmiaM4)w6H3Z6T+|lhQ*ZD|>9@qHk=9hX z`W2C?6pQ0}J|Jr+SkVx}KDU%x8j64qA^WwpuB()Ca zMY}$PJM2niUeo!SlsZ~&_%}__8%s47{6%zPa`_@Mm7MLfo$B=4&GYJB#JlZ2sgivm zQ@e|)hGq`6dnYB!j1Kd~lb6w{q3bAj&%*0^+)sL__YIOllxg#@pQPr@DH^b&Kc6xr zlQw?dMKmfyXu!fiBHopdGVd0mr1F{g1|ZdNyGWAC_4upTN062ygJ|v0Z`8H&5s8`f znH0V#Al_>}(cRx`NK#iMIrOxRG)vNsN9lCj_BE{$wGGd}VshzZo#yN-6q~h~=8|U4o0nQ@nY&Cq^_gO>D;S(siQ&wwVahoJT7POb&gF`I_(F2 z_xdH>sFOl-WiP3*zaGu{ypl9*xO@#lByXY`nqzAB*6_1B2IX+90PFq@>DIwUT0>q|FZSD^XEXGu`SMC#x6h}bR8 zr4cS6L}p!`8zBj$G1gK1F-)QUvm3-=SE{L!mN!Y9F@RY2&Zd0ppR_D-D4DssmP*Hz z@v5sX(b3CZ(w3j!iSOOl8uWS}I~uKDp))_WPBBH7J*AC=`92UU@|i%RKWdq$=A`irtDnq3jdv5X4ld-gKTo>`!l1E}R_R{d)E2!xckz+rScGHV=fBQ{l&i_GWack(tfIYPPRtHgvj3xnZ>`DC1 znbhAdgxGJ-pw_*HP*2l$v}@Tfy6oFMI-yP^Q;!yq(K}RV7@0?7HjEcTcfBIzG0~)= zV>OW!eb4(BzwrKFl;Yoc#=jo^_xQ!of9Ds8TmP9~h#h+|Z9)tbs#&vkqfuBq`8d%! zbQQ-`I}7<82Vs~=3p7r!W2QbV|nX(GKi@p9y>Cl8?$(AK=qC3qi&cuC_Yoevw-EepsBUuH>Ayr35UnsfrS;>uu(dkLnZ663B)srznwcA$ry&~93V{!`|` z&;TQ0!$M8r#V(29&KF}#vkp;1N;KMa*`Sv4EA;q%hBRC=#+vM7e9K%%GIz!{bUmAa z&mvaAn%qK=E&IV=TPR|2+8Xc<-$Z0$t9T}@pJ4o{mPpfjG0kIAVebn)VXpI03>$oe zHvLcm&R<2eY%GGv^l}J3s*Cnp8{p}7TOo6wB9q*k4s{dL!9AiCPtI=y_B<6`Pdw!&p z`P&)vG16fk>u%A8qj7ZAn0=6Pc@!i~T|j1Leu5Ugw>Wv7y#7{5Ldhk4rkm7581cgj z-REgx^@HsYTJjWIUigsCrS6#Pwg-Y&#?d>Y3t&{3AuD{tG4oNKAi3vDs@KE=M=iz3 z@vkuXLmiBN?Eg^sJB!1IeDf@NoGt3&LAOw^RrWL(^65DNIc)g<@K5eyS zhc>lBtI=av6I%%x=MJII^*Y?DQwta0enzVg2CN~(6!!V`7Ut<0vvnU2Vp_)lxc;LG zW?V8B>fN=4_F;Q*#Oglq{JDeR)8hk57b%mvzzg7>5Dt}rFJaBx4!n_KBe)A|am1wQ zu<6!I>>B(9yhrMgn*BRaa^a(Bw)+C+&-#p=jeW4&LXQP+xkGkUmNWIqwkD`>G0Qu1C>h`=6k3#v9Xz z{Y1{fKkt;foiOLTgCLonMq{Thg!xa61v~4D)SDe3dp>G0-7l7`KEOek^1)nC8n_TA zj^czqBTa}o6J-6E@hTk_<1&OO{oxOd z;SV9gMvvVM)n>K}55lCUZo<{^J)qrv6OQStA|!6p#-Nr#=-r{n-s&0($pfB2KvEZ9 zW2Oz#x$4a0f->`7@KJmdufR^Gx-;25M@%rb68>JQDfHf^B>Y?ufd^MgSnm872>sXu zYmV#yNz+g`aoCB)Yzd%caW`qcvnaZq9ms0zDc{_74?0I0GR~%uHbaG75nS~#7q2bq0;Pc`+tZ#D_utJIHM7ZNq2Mxvr|8g6Fj_z~Mz2A2<*f&vh0254tidrobjRHDPduGTu|w5*E!?gQZW;Vyk*OUV3RR z)Ev2qDm~s~#Ir(LW6(vDWDiKqYh{RVjm45YQx?3b2gL0BMk_Q=lO_jeG<;A1%jT&< z)~#CdpY@A>;~D?r7k2XY5BkNr;D1yvIIes5KkK88S6>HyD6<6PigZD3{f2j0=h_m)Y+5bDa?$u5{j($Nc1K-nXOJ{Vlt|l`^T0>+%MeJD7 z10R_UfR4Cqa(R_}tWR2Sv#78?HA-}V9SXZiQOq>z-L}oECF3POBwZCudq&X~TMHVN z(39#t`AEGLCH%|z)}Z>NO`LT18Z}LhrgPr3(b@-NX_MkaTIqj=BxSy&ai`;ndU*tq zJ&w;^{ag)3i-mCmOZ<bv;sPoQ56x4(^Fb_oXF<4E z4PMtX8j1M657t_D(rJgZu+k}$gk}EZv-|^i6_0j0+I|7e8lytHl=4a0k%6T7)MJu( zR*lwJ1&QHT$J2_ldHg|(61u=a8GR7|3I z`BE%!GQk|U%}?HMjf1D&BlnL#Al)uY$oH)}P_`(D%${eCGUdIZ*0ASv#^b+feo+^{ zBh3;sbaxZ|2^Nsx{eo`tG6bESgi0pv$df#;_L_G@16>SNAz+orU-@W*SuR?nV%Snr z_vj_bU#>}m&MDG{5bxDWpeblKBNu3Lr=GBz>>X9RQ1|7FXqiSvs0=3Wd-uj?8>W(sVbSEq(hqda?nENlcg-um{F0ZB{wLn+ zrvyVilZfS%-DGTC4@}mrARnHz(bf|KP}Xppu3V~z4Sq^;A=VVz*C=Ch=pDi(^cGhf zE1>X08AtB5h7rm~X=9`XYA`RVgyqy<|1huG=T924SsO==xJKXIXd}B-Kg#QEJscO< zLd^%gqhW0+qS;g<$dV1=k4Th3KmQCp_Ogz|ytydq9GxL%@jG~rb&*tG)e(QqEF&3< z56j1{-egu%Ar0PXhUfh2>AS;7BQ%zfUANxRucMt{ovJ?M9y&;$xs0S*C=r zWl{9ZAVn6b^8)<_zlL=+SJ7^U3L34|5gHOAsdYji)P-ciie_EmrC|>t;9ME+E5E1q z^*0epx4*+*Klj3c$5ukghjEyu_X@8kT!8Gdzp=lh2b({`Rp8n_@ZY1aV^Z8fk{jKE zgWcw!s&X`O-7aOP4)tNOQ=#J7zb&zpzmCN#$K#8!v6#2olx6yu2xVXNNs*d>`3l!) zRjwtIS=;af)NGkkjTJlV-H67ildxH<5Oy3ZhPX&qX3!Xd9&M*6og9zL&uI(0E-SHB z!WEQlt)u@lsn@<=+J@y?T@Uho|{9rFTIsTVLRc z7{ATPh6V1a1@Aw-`S_j-;GJ$Br0&^=zy5p-a3TqZd1XViS{`h&b`y-qUCiqLhOAlo z28KGS2tjKUfMJw0W?i%%8qXkM2_3-rqTY+CI_usyr z<&z!M+1gBPVegZBm^4L|Ot5$ZKOXcEjMo{!ilhC7x`5Rfv2qugJ=_a^-8zg#Z&X>6 z-Xjd?$R}|Ff5D0G`hsqWolx_@TAts-o%=eDiyu_A8dv(Kv(;^N#w-IZsbcwXZ zf!-;ofGsD_VVK=Pkm-El=d9Nke9m15z3U&L|A9=5Sv-_9mwiTA&v7&acfs2?HbPL> zU%0KeK8rF}z@5rYLXQ6{;%9XXa?YJ1p0y&RB*xQagWi(rb@jN#y%C2e^ks(r6%fC2 zAB|jHi4FVpnD-4e-aFomo?Ltf8Wuh#I*E(u$-QydYrLkQW%dXzHz32SrD_Q{-F*qahxKAg&@wz{Jj{KOHG8Mx0G1^)@nY# zhX?udV-G>v`G|Ti&!Cax^x2FhHMnzL57sgI94^Rx3x8JUBRAp$t#KI!bFV&uW3%2t znA2LY-8_JukGTxN-nTGtW-3->{iMxq4?&i@i68s@D?AmWQ0Gt>edXR$@D6!JH9}^9 zbIDa4UA-PRpR|OZ-kQSa6WcMyY96t<;Vj%Sl6!(n)M2ClH)wHRNMpZfv#s?zuyptd zc(hxO5gU8q_YW6AqA;6wlRR9sXC6k*e+=#ivashKbAkIBA(ojA1YuwgrY__`#ZEWe znid5WCeBcE_a&{r{sYr_p(RLi&jAGGWS6)YT zfV^IDV0#myG0M=IfrTMsz2y8iL%`O6r_h|d2RJef{C~*Dn5E~zvA~R-e0CZ9#@vOK zi|vFC-z%UvMTt$lG#r%|oq~;?yI{*54dHUJwUAYFjwBD<4?#1`S#p&z8x__|2r|`2 z+4?{DtidPoWX3VLt14yAgRF(n+}}80l$F5UQsWg*nhR4WDB=0r84$a;1H;Ceu(cW2 zQRZ(%${<*(U8oq>Lc|1*$fk3x(S2#m*Uaf0xT~r!r_vL`C@G?<#CP(*!a1rqiTADK<>r3Qt?y z8E5Sy+ATH#Yncze`|%6>4^?5XgB#m_X#nH4@6YpmRR;mO-_g2zBHoXTzw+;xZe94UbyOB>`~f5WDorZ{YT96md34ypUE!Kle< z%wn)6D?7i9`b6x6&Up{0`=D=fj(Ue|e)SGNxb|boDN-hJ?xLF0%!HcdRlMz%(=dDI z5wJMoC|LIX1eFsjz@poXMbsT7(l?hdpd=n=xk`o8&00d~(^eR`*HXxOW5$-y9Q3qI zg}T8lU?a}}WMz%Pnz9j8b&nD9YS%EwYzkZuOofhL*T8*1PnLc+77y$&6RvCNvSAh~ zZ0zTU|A+tT{qOAJztl&U&JS7qj}?q~_@C>S`0(%IXUqN6VqGk)e4k8yRA@r;a5?tv zKTcB`q2wrbu=;xnRbn?C1 z#Co#aKgwwklLqH{WxqT@#_D~c{oF3lw+EEK&bdR3)mu*MULGeEFK_YOAtlcTVJ7fW zSsyC!J@2u#kuO+h3weIl;5ns*-n$(`qx@!&#H(-V#X0r#%B+0aq0vY@(`)FKW>@)m z?uv(Mb`fKnDDiumB@}f#$#ro{r3!5N@-GM!q-21xq$j{}y8!U~xixK^@V)>?27k zDa-rYOBX!ee-aaFv#5TLF50}Zoyhtg&Aq>!6502jnD9XbQuH#!0C6k{f9g#?9WsZL z3l9;c6~Ac&4X3rQdJ^xx!Fl)p7D;}nzxb3ZdYYq)w6 z^}>s)HQuE?E}4PbI(KmG-zfLXy7H2(&lBHw<>H#lUF5-^&Hye|L?WF)&iZ=Ll6+6% zmi?PvHGWQ8_7;m9eJ_zyn+j>);QlywWIOrQmQNJoH2&@jNeQqhN;6c*&RMDaV@F%F@*R;UMI@KUGa&W$4X#4 z{TB9>7W9(iUsC~9?e9mvdf0*6Vk!9Eo^r;zL+NIEaBAi?T^WNYjlnh;|T%|{AJ zV#HGN_1ShhE7Kh%-EgJp8pPYqlJ-`8NNeszle0L7+(IQdZ+Mjq z=j8X;u>VjEqmCM} zWs|;;^|Q5EZK@7R3a7!Bq!+kCvlkoj&>F{;EQR&m)u>{sDVTqK3q#JD39q)xaP}7$ z=DmD2k(K#~e=ahzYk1g}B>AaN?c?OZvK*aNE*xwC`1NXD7!}R@YVDkQgAu_0Fm3ZB6iv8E&y4;Ex%$-P|MU3v&E&Z|J{>H81|G~aiHG#);tZ{UZ7Zp(wSEAkOXD0`IUDte1Yo znr{VElT#KN`rM*pR~aztUdy1m=@`B|r@-z^sRI?xTG+EfL(sR>5pL|V5v)&X!|cWC z;B#?3g2NZ^Q!Ivg8=oT6X+T*2LaEuA>h{yzV5_x*yd1&vYcN;eEK&U6#W4KKOc}2y{y>x#kRt* zS*gHj>l4}HojBpU3Nw&%QoSoJP!B4z;-n@C9e?6(+a?Uf*w?@R)+?!H1aa4 z3maP>!(E-nIQo1My?k0jkgTbrLtmN+^IUIY5)Gv9+Xe`(B@S%ynci%}D@$fDE*-C^ zC<*!dbaAfbIaE$P16q!saI52S=r>ngIIA-Y>;vi`L!M7mWg_w2pvp>|hGEo*Ec|e~ zr*NgnhPmjRL%;AtP|SKjvd0fRGR2kU?NevP`=W_=pf|?sQ=)6UZo>7Ckx=Sgh4m+P z;iB4|$bDSlm7ZwFMp`D~{vJnh?79r-`1lDV7k3e*^TEj1zd)0g7W^YwNw}r2BP=O2 zW#Mw&@H=NNd=oi=JE+Zf3V*`YUNNxb`U$YpT8=db8t9cHYQg~SG=^O>gXd@au$=g% zn6&>V&y6YO-P(;=^WRT#gGV;z^A@o2)>BL?Uig3T7s>zTF9YS<|4VyayEtgcy!jy^ ztDIKWD?_!D^xP&1|N2}nfKGV zOMXvLfKAOiX^$*7%ovkP<1O}!QrivUc;CJBcY6%=aO_Vzk4>WY-q?U=dohhmPbZgF zI>F_ze&BTP8;S7_79Gh@S~ztJU6XQ`c!ldiS@AaUQ(r~M`l%yI`_3RTGlx9y^^&{> z{XfXe<@K~`+A#WBLkF0pIB%|T}@iaG;enpQ1+FkynIV_K4j6kb03o$PMbGY zx1_3vUn-SxwBluP{Z|{5G(-~931dmIQk-}?%nlwt-b>HL z_rjR_j$+%lUeI}3m8=?ihRpRc!k8;MJf-Fku_KyHZ5~XMewT^f_Y=inO%Hl@eF(YZ zkWEHEY!>5Bt|3L${ZI@TM7e9d$iqIGaAf^mYO5xa>F15Hi629QUd*BwJ5*r9uW%|= zKPXBf%Jb|$-XPn~Yhz>oGP0>d6^h!GDL2?veDl2zMrr6{;Q9w7MX?VpC>sbZ_h*sG zw$I7^&Qg*c9!(4`yU{OK^q|L#-k^KQ6cUc;<2H-OG+~agg5SFQC)p{_mv#KLllXg2A@NH0#H>Jpc%K1v2`%A*4uG;!etOX%8afJy$7#Z;}k^xnj9n!0rb z4SB9lt_10WPhA&%>d*%aSKg*$s5X}RZlGCp_O$x(cfu|5x~aL}1yu*!6T>&|qLu^C z(;gGv(?JRSKqbE)`0fqlpDw>i8;<1hNzY=aZ>WqM>RUn-+>VinA7d#0*cOjh>cbyG zG!3wPMmO3#AZ6~y#T!rigJjTn^2bdhENK}?a|&JY^ePib++t6C`u!j^=kvS*4{PHt zFAh^q#*6;l@qBiEJ~h4TKqKSg$b@pFYm#MTzTEpdaKi>-DjP%&G4J9LD@eRhK?_G- zC3XdCiOlM=SQv1M{6*c-a{oi}X^IW_>NZljR_zS3&m%~{0tdeFHKXHh=;1%{m;W}t{0DvEs*?U0Uv|4BL&ua~ zm~?)h_+rK>tf=kzgDcClO~B@v3o-vw4>q>fF}%7`l}X&{ zsr!%(*tFab*Niu03%1_??To#s)%Pa&{CEUWJ@!N5j}Z_cDIrm29FrQ4#+zBztoXxW zdSzU1);?eEJ&`BWQ1%lfN<3)^ai`CtGz1-+Kj6wJLwOb=9EXo^6>4pIf?c>j^xcyV z`8lKc$-@dj;`58YJmM5y3|1DRhnM50j@x+GI0`jv%%G?-9DJv|C$;al;@Nj*f@&}BXs3X|z&qLePXSh2s64yR76@rHJK(N09 zkGpy@k2ev}$J34RkpO9W%D8%>rrg0CiV8j-ps?pfND8z9?w$b+_G%*w>c8Lx`%@@g z6F^rF)nxCo9GMk2NRX}j=Cwn+2zF{|vq|#|nA9tZ3=4e$t-1y*`zV5X|8l^#Vtk@| z7uHc-!Md&lHEO?ONAX@L476wCf0+s=Hkk=~C(E_N0cCW4g(WK#FGKItfkMhuBMkRc zXJ@Am!?G$9(sjs%1=}&A9`+9NtNWAtm2V()|4-bj=OQHTo)1RtE5GH6j4ie6qc&p$0O$l z3ar>c*x>vQC-=L8CcWi34`Va7K>aGLD^d{d9aI!Jr6Nz+z$j4W??HQ%2{Sswu}yjI z%<7S)kh4CQjAmWrQ@5fJ6R1k^69*IDEr+OAzvsB#aXaR}*Aiu0&XV`8IzoHq1PnNP z740*$*ul}R!jEWU;mSNMp|b7*I4%2y8$~4+`)!9jH1rX!PS<0v_J6>!))Mya$2EAY zts?~2OeZ=Do2ktCgV*9`3c}eLO2F-Un3v;x9|QHi!8oh^5ZSO2eS2QUtFE@tu!nR$s{bsj$S(= zMRl1HO16FQiYed6hppp@>Cc`x_0Vd1CB_Maawk|jL<@TE(#OB1YRa`&Bwsg3F{-F7u#k~J+71~mFiVs+jBQ9(? zPKz84lRsTsY0-Lr;_DegLx0zkFN%hcTCRmP>*7VPP#3tHZUNr4nq=BW0#BqLlm^HSYFMex^&&D{CZL>`66R(__bejmu?UdC{}u9hBu zw1eKrY@%^dt;F|Re|pIFo;>U5oJS)JFnnA;YS$*kiyp>Md)0ydOl7oQ;)WiMu2?f9 zDX+ggoqTB6De^W>1=`+eV|dD3!k1f-(eo`RcRI=Q&;kypx_T2%t~E3W88p#q0KIg{ z5i@>UU}wrfK2`M#DIQ0~)o1f5cO=QvEwX@S7M!Pr7)RPm&1u;lKN@sk8F9^SB$q7n zsl>~eX8D{FgQz>Tz43|sIJ%SmTz8myznCXZHBdt1(=PPsU`BUeenF_`K&YxvBq!H4 zlbX$U>A-Gv?Ayhm>fAK(@^^Re4w)ndhdW`heKg;i-5;kqyrW#f?A$P;bG&P|NZfCZ zm*>|f@%g)_^BLdE>Bd`^NYG!SNNJ1#7C$ZFN4z>mIR~TM{%f6}ylWSkY}HD853z)Z zmAdrUn-r>-*$X2qUCG{udSI|zjpm=#_Np2>l?G-$r2Kg+sygZk9Xnp0>!|7rh2D9z z{kAII*+UOTSPddFpWk`=HTyt$#t9m{_5(fK+YSpQ&uNVQV$pi*XsRX8=k&ND1h`G`(lIn7m?2ogBu)htdevS(mcq@@sH%s(>H&YB4?m)bwa`QYEE79)3Pid!P zfw=5^wdfMgL5z2{D1BH>CB?(3{ftbKYq^iUStdoLjhgtS-UXg4JwdYC1$xh*CrD1q zfBU_DBg8h{S2nU>RO1)N*MlKRXDY& zxA3=$MDV(1ENnHXgsmqoqP0AqwsCw5R6n(2mcym&=@@+>BD|D%Tp9`TSq}KE;5l|o zyaW%dyYR`644kzn9PP4eNtwC|5#O|9Pob}n25o4VSqQNK7VNo>8S_0qonBSkh$BK5 zLW$8#=o)+hZanJG;!C3;Uw;&O7k?FR?EHd<7PLbA$h$Q3wF&E?pd85PZUU(HwP$IQd*hu0H^A@AWqCI2 z3#@7`gUdQ~czuUEJFI;XRilb&EDMH~QYDOSHAA1U7trwPCuTWy(uk9G__NK3siYVP zf133Y;>Pu3Ut6pNQ%z+ctInX-&;wZ4RE@Pi2ZEz&9hydZ!j>c(!Qe2CwUEUX4ArUyc>#EeOuS5uwKL&?Q!8iMV{6v(-Ah1{MyP)PmU1+kSb`0IHTw$gjR zojdNuMR|kTv`2U}&`5aZI08~aqTs}1Ey3qaJ;*cEQ0CSfUr*B(Ms$0DirHOUjru~b zUxtF}a1U7aWe{%VTQPXrOtM`l#NXr3V0Xm=dFJ#ZUYBUG!vj9;Nb+^%L zu(=Sj$seXS+(G))REWK-1)r7}Gh%rMceUCGGl%|!0jpGmil(91cDxVsTy`EF7Dk}q z#Tp2>^_U!cqsYph?#Br|55ucYOM$x(NULZko=I^QLKCmRqMOsvE-YI<-!2YxMh!*l z*>>`|a-m|t^K|;8yBV8REAZL2foSyiD~#CLNXrk_L)1+R`0nm51Wx?{Z;h;lf|oi1 zH)O1r;mvxasus+m$((WRtMexLnX(!7=7M3^DLCk7H-0}kY1t%n=lx61~1#giECHUu`tBcu$uN&UAYIsEcFE|B}TrMhwr@xdS} zRJ*gpyw&(@cocM5pNAOp3nZ!V9RBcj6*f)Vi?x3mFmaOuJ{qMfWWD!-oSZ+vapqr8 zRrC-we>w`TkD}q*RRzJQrH^1)nusx9CjS5PmlglaU%J!(8DA;{P0~5bgG8mJQZ6CN z%i?8E(77^MEa$4o?8%ns6Iw%7aGEgfz6$8MetiZtYkDksx2cJ=oOTf1TL%%( zCKogwaz;!aWsZ(I&2+T|qhZC9sNE?Q(z!cZ^t3a>y4#E-Xj(!{z%_oj(JgANc9^WW z`kHDxXH#Fxb^OLfx){Cj8T}OMgbGu?5=|RJh?u#bPd{Y<5eMAyWNjMZqhe{+NOSb` zZ6UJ_ZP5Gmy1aF$0Uwg2NJA9hfr0`^L*zN;_D^}io1NvHYQ5Mz<_kU9e2gSA1i@$d#~D)Hi%69h@t{Lwn?N6kUFcAz>eB>iX~G&XZRp zUC9wP^g2WTx>rS4>NL`sk?F+aw>@dz_k&d5&L+3MSz?mYBQfn^IW<-HCink-LVp;W zV((2-n6j=13?6@<#AYod`L!N-(G#B#h0ABDds$za8g+&mtu}{S{-)Rv8zt(be54!v z`a<~)RV;2@NSrRdCf7A<$)P#!I4{TqmQ`Dj?98Vmv+5?TpL&57Keonak&B3}Z#{Xa zpGLWDCb?U$C6G;Vujp|fYtYc#NA*{lIumkc zr6qU=^~Kq0T4a5bGnTbfifa9)S>Up}NLD?9Hsb=w5m{%Z=A z%BG9GkLg43rLkg+&QJdOjFZHB=35#+Xmj@cK*<}Ger|SeCRQq9H^)iz3rCZGZ`-YeCs#l^@Og_z8Xv_y^b&Hag?|H-b z=;O*rTj1tu5s#6ZsPu~~!5}GiCL9#)dR^c>yMB;mvEh92pM5a*0wacEA3SCyqe;m# zXx6WBq@nKDKl(@iPyX_s@#R19m)Y6>j4u(fXQ(7?4rZ+$L!{%MVAu327<)SqJw|d^ zUFyaZoG+knk3Q_Fl9ABI+Eq|!?MAjG6%8%488=SAQLj~5fX6N3UZ{doOnM7Ro_hG# ze0QepYa)bCUxh1Ac45=3ji9vuPiXyIBcFFa8P*+8WBldCFy{6%tdjd1t_!}Rr#$yY zE*i6lB{y&@OM-j%`m^H)rNT0EeKy#=2;`Y2h$pYeR{zKFtK<7YC>BGE;;<5#Tpzo^bM=ul zajymRYml;%7lUAQ%qgk>NsyGRh|6yDVh=Bv3#YSpqTlY5a5vE?*plib6vca^8m3$aMyV*_H79%VkX4XM}1y`UxRk?wI8Mg_p?k zNr2-;a=auCZ37(GY@;Kvb^ck{us0f4_fE!dbIjPakxoL*vIx){^&TwzHHERK7vj8Q zsc13TiJj2P#|`l^yfZux52YuAsizvA?|6cq!z-X|rIB#{^G;Z_+nSXQOas00lMpvY zo|Cz6L$?Rouw&^AUX=9^ZoSnNnm^~ljCcjXQ_+nj?CmXhUse|L!z+1J?@m$&%52T! zBG6oHDy*`Nf`-yl3}P#)7@xN22Pd&X0N(gZZvG zF!Rr4B>k;euf6^8=*9yOyniR`IT?>7uJfVt=29%acNOMmy@r>(8lMvpfITOjpkjzM zyXm?)TTrjcMtWM{&Mb8{&#Dp)eq^F!i6Z}Z|Ng(FzpVS`{)Nl==lZhoX)sA1YDj!b zoQUXtf5v{?0nR`?oyZ3e;wkfkYL7C2Q9XGiZs;_UwSEIVc-0+6rfzh~+;hZ3qKsvZ z-?%B0-VqJ=G0dUFQ#7sbLDHd?OLthil3?@MG_-##$@-0)Q&J~=uznTwvF!_EqV6y| zv>mAZ?T7SThTz?P(3#)hZ%4BRtl*elL)1}Jz^us+xzCerAWO>tr*9TK_M1G&!hY_k zcqx&5h@kYhg0SK$1M8EcnCt1EKwC!#lnNN<8$p!ZCr&a_m ziD!uMk$n0(>H|GsluuVZ+(cu7|Kh&Yy9wTbv&3MpE}6H|4t`AEMo>@L_*qJA4zpnEaE9nDyM5Lk8eTACX}Ev1Qt8>y(qG%r3!la?L#XB>Yoq7&!TllYTm+>NAosyO%| zow4>4310SvtA6*37?m6#kMCKcwD%#W*HKCL8M)*0jMG%)lJ;*@6blf)WfLrXXsfuc?^7K zM7q9~a*`@JdPdlJt-9k!f<-U69j;TT#6X|Bx#KrYw~P^31vT9HjWV$ImOE7bWkB>U z=aaVSMP!)Tb5gZQ1gZ;Sqk}#sg>5wS zv4jNLe_^&=kiqY~D=MVyp<8yjW3cHyMhE*jw6ZDR7nNUv~40)x1 zLD$T2;;YAGZs9!g#>Wx6`e$c zVa1X8uY?TIh%sdH1pWWqPyef~{7dKkKlU$wuP=Y=UrY-A*b~J&Q9-!U)wDB&go6ywqD5QC0Oxc9~}8{?KZrd<3t$tJsEb? z0^YXjM!nb`Xz$%chs89Z#?)FeSxc5(@3$A!tXeQ`umV3OSrKmyvtffr*W&E61z_$k z&$=D2qh%h;>EsJ$Y{^Sces)r2&Niu_}D@6!J)I)3sU)(z`tB&q+ilJ^?X5;h*O7v;j0$f~C786U6)ukH&+a%l zA43au_z?Xbx-eCpopQGtw@($bA3C+z8Lx67D?NZdNO0vx&URq^PfR6S8tr&aP8%Do zZvcC>0JlcivipPX;PVtUHZ|B3eRt_%e#ko*q;wsZFI4CMtnp;?4<5l8W4nRV)x#$B zJLvZN6%Ei01sWwVqviyXOmAQGE!W|9Zxt9kX^oiNp#}C{;qGOWR&4=w z6~b)1FO07DorQ4=ig3sVQ(iuN54KNwgXxkg(%`n4E*sf^8xOidpDWJ1&h7yCIMa*I z(ru$-BKz&U{5^6Ef;|+gkb}_9#6Krvq@XNg701*AkQCRP}w&y2yUdcyAI>f z@qO9oerwU-xifF+lL9VEcVWT;PuBOW9}!=u$8o`?yy*AS0=4E!&=4D9ht(9+X*&j< z&PMFe=lK{tXb);U8GwT~`N3!>ZPtJNW877m0eyG(=ZBthW{*8{V)Gl-`8T7YaN_fC z|5;!DRagF1XGFs5fAiP5{h#?sJNSRj2Zb9xBqH*WYkv2Q$-P=lay5#H%gIRM*n6J7 z$S)-R^HS(W=f9|*Q8@`;_JK$SW^)p;Zb96Z^`vCnMk?+1+DBtG$8=$TV(?3y9DR9& zM%%Pe`&Cv%+PR*p#m3RaTGm9A@SRg#_mDUmToqUrZ@4u#zLDg{W}k)^ww$C^cdQKwJFgsHzx_P^{VzGJ_WF)EX&2(=4TnS3IW zCV3~W^CDO4nn>JKHxl_)p4{AYg=C30P>FpMjZd|slF^$;cf(aGnz)>d3#p=R+QxK5 zo0R&?8xhfu1Y+R0hl&~paKT%K3wwhA;+uGy__+)cdfiwmDVRVLj;9b`FKL0ebsJ}2 zaEO%6{EM;t+)e|RiMiklUrCpi1vQweOS*3f%$+T_Iq~_+1t$c*v-r(WPI~%Vfp7ox zf-Lfu3b|#{X_7~ipZn9G3qAB|;Q^{qVaj-A`jNQ?H8hT$Ne^#spxw>iNuA6zBARlN zQL{}X=^wI)m#;r{oPL3{s|kI==w#Y3b2kZeT}|^hKP5er<4M5YNV>yb$Ri!)s5P5J z9;y5!b2lUPn$d@L&HY2iH2)&f*u(-8=M3WA?-2D{P(zm%T_G<5GU?%r1S&Fj_a0%H zNPUfN`%D|8Km+E7(D+R&NJr0pTD_;4s;M2Jk*9M=jr#>Atn4j~9GAjnCWaFKL36k% z%Otcx*b$|_Z6&JeK1BSsg0`2MGs%D5WM<2~A`jAk3wxb(rrp|>Ht+0a9^8r(>PI2t z6+E0S?sO&9^W$myfimvJg~zn+*)V$WiiBo1oTHKJy#$ZV4r+V9hz?6TN`hkIY2fv} z%sG<`B0k#2%@}xxuG)D>@Hq#N+z40lDB%e)`I18IO}}yRuc8^3^e;q1_ZaaVa-N2O z52F!oMHyogDt%{8t$)kWDb_!z^rDdY^G+er8xo21%F+Vmz#~+C=y5WC%6F0+Y{vu+ za;GtQz9c-_QecJEQAMY23K##-^-9O-Vw(usmVARVd1`nXg)fU{gBL~Q?#0=_n zp@XQ-zen^uQ;2GZA&I`PP1An{5Vw6Z8DI7JOm4w4L05ZE0vE(Ebx&;wu5YFWPkU*@ z@^{3HUZNYfN=Vb%GE#RXnW(HhM*Naike=n;h}PQE zx}04kcgZ)V4#&}u*wf6TCUp{b)0Wn}yhH;dx|qeMcTpGDLYg{jG4V|~$kn_&K;^g2 zrCG%WbnuG|PSoU?CzUSnk$md)jx4&#WgV5L!pE<`N4m>|e@v$h1x=jz!dOz2@sUo1 zJ0vxH5!sQeOvUf+)8_fj%!AXvh;Mrp3ICHy9xaNYYsBR=a8xF9u<#_Q8(m4cQdyeZ z^_-TVHk~1pOP4xRI(KgcDXV%;?d3ZKMqn$aJg0=2_A8!b#Ke=E20~q2F1&ZXxB93} z`AwOYMly{XOZ?V!lFfHs)A)*~T=R){%*_L>!d2*nHR9nXo`A`voyf3tQT_Pj- z^nx=8NZ^t^4;M(oJ{EKcd8V6lR+6xhMWmynAMG0Ti-^5n`t(UM8^OwGFUWXu9MjI8>EG=ge3?it+{VFF3d&_iIUL}$= zo(T;Npo&JvXlm+g2X(#T} zQHjTi_sP#xEht{d&W92IA~~)u;5$ik$)f3Bx~NlTHZ{0*n|r#hmL3)6>Ke6b;=i(i zK1ip8yL*Tl3^k%-N`6q2XG=(l_z`EDpF}FnwS}y0E|q9sC4mzHxVmpo1=g0T;B)K4 zh*!1JxTE$|EH{}f)?7%vepnK5`(Dyie2N-fiTcm|vG0F06vFFYea3%$|L^q||2@Fe z-TUYMB1CmOe2O@X0rwk8OsE9&SN34!3NZ${%%Vb1OGdvrfDK7WIOFyeNS}HLwD!D# zdv&(F)zXvr|dTjTyOEB-EEx&p3CDb@(hvqH9ym0e1 zDs{zUk<9{3HcVv##4|y0%3WyF$wrIAs%+wHM_3?up|9P!iL0c2cx#`*I81vr%1qdc zvPtT^TMv*bUn@4?Pz!xM+KjgpOF*%@9@+=ZrU}Wqyw3E=7%%X?G?w^dQ-&Fc1N&pn z&RV!W*@f*n*^XWJdr9BdZftnK0eCsdnQbUKz>INvOuhZ1G5J;`9WqLrtzZi9*XRON zI%~{2?H>=hWBc$qeO_Zjt1hSvOUF9dKlnBM5sJ7&J}T>;!O6u2y!#$U)><+Q=YL56 z>C0IpK5iN$r^pgD%z@pOPq0DoL0=GCu_nPI(5G0PEt@ri?D03^&2nO)sP8`X)0gAp zGuz4Knnv{3wIsd|f0Ae)K|LPl1k5}ZlpZ;-yTWV0{kbEnq@~PyQY-%18)vq9(|gz> z{{uxQGBtO*1}VR4NJCsl@LH2axYG88td9$mFrc{H{|Cpt@-RNIzV|s9Bb5Wm6Qq zJlBt9+*r&o>c$a$1W)Ptdj*GVoY@C)a=g^}i7;O)^U6(f@btzXkO(wcm_%SuR&T@>jFbJ2%-v_`$LPXNflYkq zpdK5NlSK<2yRZZ2+4Cnps`6&vqS0WE73})l3H{%`7Iy9daBkjj%)Go3!xgPDCQ_C+ zNIyy1SI?w_n;iM%NH1Qt>oH)A4KJDhiq6J^Fm1*KydI>+&rMna!9Bmps)KQWPQM{% zD1`~RE^KnxW$e-%0atys`RuG5wEW@8KS?|aVd;Nh7N^3uJnaRoXVs7(cNLT>`m&O| zO@g;{AgJ8C0^*i0Tx0bm@Hm?d0~~C5qeZH`bc-G@i4s1K_S+)y2*bgL$Km0ve^8BH zhPLIoQ26Bov@a52$kXd|T3!c!h)cjzzhzlXVBk&nFYpxaVTmBZGt**z#ZT%r}p(S>!HevGeyX1G88ymX+0;>61@?n)b zu+8QQ#0?vQryj`gt7`>a`>+&~Lp`YJ`TH0d`IYqWeV|_Y8$?@V@$d{U{#|H1BoEue z91NzA+c%30Tl^eSdp;u=^5Aqw1#>;lkavr7LBeeSDYu_)J=UL(QZwbJDqHjSM&{t= zc~Y2Y7>!cxr^ z!2Y>@-kMbiR$$*;Jc;+aZ1~#wj%>z*)8JkI6u7(nAj#ttCcQWUog1rh(W6)t)oU{` ze`GQ1P88iC?|^qFJ;Gn}?D!X-m!rMA2mX9v!`qw|_(T#74BVY6%zK_FE=yx(79|`c9X}L5J+fmLI%>0pV>jZR zCw=*gUo7}O@=EOcc2!=r%L{z2%d*^aFSI`J2{Y#n!q6c4nEFa6_X2FOfD?s(C3x7BA817o2%&V!FV^_>*R5$HJ|Kvq@ z_f@3e*Len$zNxe4mM#?D-x~{DRRu3`Vk`u_SqrD4Jb0t`@sKxNhLt^f6WVUxf%F1h zyt#A~MhNeF*?Alc&C16y-zMYu@$$TXe{+=Y?GJiCZ^D>~dQd22i(mgK$DVQaIQxPX zZ(Vl+`%Uy@?{2VQMPHWrbcN3b-BoJ5U8OspFuoF`bz49-;jyrv+6*NR!$H!B1#&}; z__T#Zu=0opzfJxJWbG~>=8;ycwBA(Csz1U9=pzk;ObVei* z-TIavv8I)d%qpOT`(IJB^a@fw%1zJ{&6uMXgQ)D217z`FE7ExWF>w+6mbpR3^xU!+ z#B`B9N*`@0*tX^uDIH(|EtMN+Yo`?{-)jZ?j=ABx_TQxQay@zHltz5BvN@f!^K|3S zjl^KxBbu%>iHu&N2!Rmmld~h0h_bcmzDG%vb5N%FL0LpHe>wRaRz)3`d=fZ=dk832 z5d{lt3_mr5=9&zkUm0IdgHe6pCx)jEQ~W=B$eA%+;e zOrYDY9VO=k4|~Ul>om)M16et%huRA)nWFytX^3Sk9r17-DUt{o)rfao@uVj59^cW# zNPA53Y#_Q!8U3`$9b8-v2u!HQ%!=C0w4vRI89u*1R)44_dLyq<71IvVT)LX}5o-V9 zjadRW_z_b)@G$u!c88J6jbPt3YbbLaPA@P?)OYn)X2M@aklZ+r$v9(;VlRF2X}cA4 z4_QaI&Amdzu3xzs-_MW-s;#7JwJmAwH;KfQ+~Eex`BLlSLrKQ*mGqgw5I>V_juM?e zT#5QPk|aFir&ee|h_L?{cftlM3N>Jokqzt!ilVnanSi))Ux7qfi)o9nMd^x6##UNS zU(~LlHMcZr*3WY8L2C+`uiZl$o^Ix*rSB&5t6!49b=sW!7{NF3B-p!L{t-3Smjhc{ z2~koqgD!qDIX*%OG%FM!O#Qbo-^Lbnsa&D0X|ZI?T5W2p6GszHpCIx>&FK33X(U$Z z0BzCLLEp;BBv8GS^B>gCwKlkryfyNW>-U>J>vn*W!((Z>ys5x@T2CggJw~k?4KQYv zB3%&a01DGJU{Hz$7|ZBk%2Erg^wTFFWv^1lyP+P`~IU`Rm>t>YE+N>=&2&u$HQ~vhqfvl?(2w~E}KH-d}Cab z_MV)$b(R`hD?qd82y)#ACEBWNq?&1*8yDtv!GR^&4>T>5<}v~ zVra`L8h%s{tIbPbwyO%?Jfj3TTN~Uu!iF6ed=T1OHq!?mUc)*2M{sL)9VVD|K+R); z*;*i>>YIg|2^|7gQEm?ttYpcP%?X0P{vK*}^kJXazJe2WnNaUg2%)X*q;JzPJbL3Z zyfrswJ$?K01DCu9Cp~+9pzRBQO*i0%sRG+x?1FT&8NaMfiDl-Ppv&+aZ0XsI8bjpx zjAsp?nJn~dmZ~7%HvkN(yrJ~04u4edF1%l_z*|agqI<3z>l|B)`!_|v%}d2Newsbo zyzLQ5f9A^fX%?}&ZCzMjY^C7Y2w`_mVjyO4Wg}{6)|Cu;VZJBdaCjUQ-QCUI`qm)i za<7vUI=|rMmVLObJ`}?*8}mi|^Dtr1FA#0D=6{rUv5{7zVU6Y!82ZqQy_wO2b;~E= ziPBe~;k}bg9@dZTw@!xlh&hJuuBj-3d-)&oO<1+&F0@zD;~(Y;`Q`Oj zh)&g1ur<8~_R~}`c=8Su7px%;tF75-o2_B$Kt+}P5zD}DZT{7&Kg~sf`#o0n8TVTc5sbgKG3v{MDg)B!y z^1al7&z^J@+wHpu(_sN0Rtq`qx1&LQv@%<@*_!w7REH;tHK;c-595YwV$)6;3@ugS zCBwh@w0BgH-EB5}euO1&IdT>*6#lL=o;`pX^=sta#a8GqGG&8Du&9>S1{sadA9)HReLLmywJOdr`bT2EHxP zVF%it7d)_G_`uqj51QB_{6Ejp%DNxhaLJe&t!%-~TReH|TOK$s)tq%&W{SnLPk~xi zF24S97Y_)Tp)u3<;1HFQC~a^h_n%!sT4}^H_XKWKKv{m4I#2KKaOG83u7_((U;gHF zHHbO4nY2&pA$l_Xc)#5in7sHePBe3=&!fT-pqr)757Ie-moErAB&z~2+HS?$Z@0j6 zm35FMCkr3c1paG~1G(E!i1BuhvEfT4342%$KZ>*P_&8g(d5bN4yKo#Qwq}C+ZB0JA zJ{!1=lVJMD+eqH%v-&60SkEvw_L`+Lzig@nJ4NmaB(s;8Zhfk{e3u#yuJG=xo3$YgkT^iU+kN=tMw`bf@eKzD!c)&mh->!;GG2$(U!lu4vD+X(!5`BWeH_VX1^nySZ}96kc8@hr;6&434cHF%AOGI$j9c*nmU z!{g3={NWJ^FmcOmQ29}cQ^HT9yxeBUa%;ghl|vwjraob_??axPA=^@V144H6l9I!D z`1JcFs0tIa{Vv(EZ@#*+eTu_TH2)TwC6){CMFlq5s)^B9{|qg&B7nVp6m69s!oMl0tXp72pyvMWi zU?$#zF;Df0_~T|_Zn^=5^74G^mWfzW+eE*qxUszsvi$c_MRv+k5nt?wIPmHj*!o17 zkGFTi&FhlDxyPMd)07L5ONCzg@o}&W-Hf6IhYG~bcD(4_ab_JRpcT6E^3wnVXUwKU z&#Uqy-m2ghHxqWGZ!s#DEkUtnI9)tJozE^5*tPxE(XNOxvgoM=A37(8sM!3(6E4YM zZ#N%Icu(}4Z^wqWS@4BPJkC|sh2D$jP%SbVEc^Tg9sPX3;v-Z$G z5*gM{^FGcV&<6XJ?D%5x1RpHW;NNRe3iH3@nA6SdE&*+-*p^j3_S~u@_$hMgDt;xr~!K= zPT);A)zVFSo}y%V4fA z2Mha6zmcY}WuXJx>^%;ef6RfVg`*(hQX=?UEB>cG`mesi_rLm#|M>Dh=%fE1zR7>z zuj#4#=lP{8G?nRE5l=5vIfAE*Hiiy8%K883L!{QSjQ{*FBDdQZ41OIZWf#73T=6za zQ=GtF_c}MQK8f5|=+FM<8vSTMWr1?dURpd%}-K;`It5;JfNryuMB>6UvK(dGF*ei^nT zrmd55ea*1vT@W!_(L)B5$P1j0O41x8ru@47RDAFd_kOf2?2!FR8GR8AGLZX zI+-NDeaEL=tk`m8lxk7bK!opWF$vi9y0|M)1#5w{8_$JJ$oG@rt;5 z)DtTGW?3-U(t<9Yx1Id8FP=_ucuqZsmr+j}7eQmIW%f?jfk|UcKuu>MjhF9a_;&{I zz0nQ-$o-~Mf~ttiu)ZjfJ?wLOt0Kq`K1Ad5W4O(|v2IuGCxQKA5^ZU65k^v@YGo@t)`H^ z`~8_twonA@ql8b^6_T`-!Y!7TaMJd?f{0!QktsztXx1(}y!+hcPd zd$MSm97QG=%R{zX4e?#0T96%foMbOtNynHUCjL7z8RcPj7-^LzC%G5QM7=bD!@~0r z*n5wPhRkIwZ+8$;iTaSGH)m3fVFSr1vr7bfJILr58>kdB)Ml#%#6P>vDc?xrQV(m> z5ejysEU$!~_A*BUl_f+i;sVieT}~$rc7?&gW@O$Tdx$@BiF@1U1U2X&G;f$INNutU zCJxudu+X=(q%?vHm|}(<1N!2^Cw(!uDS(#EKStgSen~_VFXc;qJoX-WSOc!*UZMe> zrnKyhA1xX`iSF*QgQBf{Xmm>{-80w#Bb_^FZeta<^1cHOOFT;adoNP6{F`*_I$2@H z{+TaXVn$w8C_&+@AH;N!J^1#N`z%iIqJeY5n7+Ftv{1Q=m@hg*b-r5R8Ic_5NhDO` zpffdbb|upc-01Y)YX z8b{xe!pL8k`dbek^iIQgaX67yXLFN_OMCXYHt{J}scNGc@b^m(YzVZ#4WGw{I z=}iUi*4zY{SJv$0iHf}3ep|L-Tm%@+uY}N45ifh;Ehw7&2B}p5Gd;_ccQQ6XCrJ=2 zEbq(vCTVcG<8Gt^*wc+_ zrS%{_zY*Hxn?XEbIN7f6#fCen%j!6KVa#9K;mi3Cs3Q2PRgzM`@u@z4^t+%N7vHB*!i|k- zLv;C%IU+W3&n}d#G=X;)A7bZaXMTjf7|!okV8uU%lZ$6EaC1%)_!Z=(AyCT>L`!mL=Uxw9O>_=+-AjjbqX?)UoeD#R*)MJ59Z1*{ zicNDCLh-5=5E%_(*390G0YwP}Bo|>yZUiV_oCtZ>1ZMuKZCs$pgnuCXo|}U@sp|Gu zxX;p*uTL&Pcz*}PpHfIj>_Hk8Xu*z4UX0^@M8JwqX8hbvLmXYD!)o-`=Hn~V$dELF zaiAt4U0VZi^)d}!LFqIOENz5W*UNC*!d9FWE&QyHJ3z7~wqWaxTiE=gmcIM;6gF#g z!J6IQaP3uFKIWnvq{VpgLCd=#=WH+3INT$N{zFmP@sgHI^a0D=(=n!{mjs4d;Sk=1 z{kQz@ZZ7}7{3Pz}Ki8MI_d3+z&tzgWripw@wZo)MS4s4x7P=~ECz%xMjO%@ZXm@ry zO>s$~XErE7?ACL1z1uaSxJ(9JYx1f8=YHIY&KD%6cmvrzp&tbPoJ^%VQwyrYZBa|B zo!mHhpAN6lhJy{-C~PFj_GR`k%CwyvUTc8jO2I?D;|1O8+7ITh<%z7|UkZ&D(?fDP zAXU{Y$gxw!q|w3jezGGPZs1 zktfkfWc+JYSf^%ynUNz&dxJc;RN+2N*w9OTzwG4JCKl82L+;VMQ=h5nK{fOk8c%Fc z8=5Z3Q|AzYuMlTMK{1DPj5MS_6`JXmK5;bA<&@9ZNNdPcu%`a!#&M$TYk8|SEFl&X zFVIJ`T=BZ~KB~Cy1X-cpP5lftsQ2vmr00$z4N!l{To!IlCMwxt*x3U5^p-yQw$(7w z1fClB-)27DY@|W$9=J9zmX5YnhgDma6Ul3TpFi`g;NoC2a5c5So4g{P2vfm;>vw4G zx-V1|IF$M`f}18D(-ZOv z%TIU@v$I3*3Rw(M{YA6JWOHYt&QQ_zS@{+kV$6{FKvvK14<428g+53Y?)56exhx9| zne>%TTWCYlFSKxOjzS(^t$`eLI!-6c8iPlwGp3jq(d3Vd7|HSzK3Ouem_@e?Va!KY zobmh?`RS?$SIa4B_>@*qXkd)3yFBRr39m@|HYK#WZV6Stm0`KS(3D0PV{h3es#o)Y zSd|&Tm@L5uHOGyPu2F&VwWn#iaFc3xts$H{o=Ib9Bje_7hPiX{xUnrtV8)da!>kL` znpDv`8F!N^WL+YskwE5wFj%!ks2zi0k*g@wTO ze^y0X%gpFfy{{z7<_Vpco=n2c+G+3YCqz^pM+!8{DdS>*8a)nNd29u32vxwB2Q0B^ zd6b~j_rsPyM@Y@(A@rz55p|DqhBf`(68oGBoZLe%f$4XTF}N!y{O$T-d*?6imyaBb zipnA7VQEy=Lm$75@1$$UMbhLT+|imio7?4do*XvsCH(|VJge$28ePDUl2aGx+(n&K z`*0Z5cu+-8&$7l9R$l0}rHyWk^CXeR_1u%WrZ~=3LM11&nY8&;bjFA;B<89<_u1(d z6&XDDu~hp;vhL2NjSpIB?UVv)vhe_Q*?Efuh8oB%B`UPBMJlFPJt4) z_w-3zC3wgD#^A+$p}``KR)t!#qu(pBv$+y@*HsKlI(zo=b3<0GEC4s0lw}Lg6bc!E zWHNe>86Q(f$gn@3K|1vz^OdZ{g^Lt zV!4Ll5S<+jhnpH9{<8>tH+w)@c@v1ZTyFP0S9avJqo@>IhjXquup!GmNzrV7_&Tl& zGrZd%rdr_s-__+c-k9=g!EeF0?^Z5ZxWk2BF_5hEo4d7L@DD6n#XKEb2NUBs=$@Mc zAAjnyAHw(HIrXdXqu*=1C3a#zD;Tk5?{-6zo(M!c4{%RDeMW}_FFr)b@z^(P0clwV zF-WPPv+PaSzz!uWSrtvS!mfcRdG`><1`hl`t;Xd}-*9$K5)QP!hfbizhe8qY9&sIV zwzWdGUpv~IF2e+Q4gPnQGh3nI$@&{JI7;CLvvR@a33((pXfLei3Zc8< zEzFZxvPuIr`QZ3iz@`saA(dx+1Lnf)lAY*1^anDE8Tg}o50;y~M_-c7l_gtY^|2y2 z)L+<1XYM8rj|5&==)MBS;3{m0m_u==4PP?%7B$$HiPehN;nNc#vl^SqSe2{s22l<; zqwXn4H%C%QOP$ZflVUzbcPMmdKEu$5cZuORMRsw!BJN$P$B!CS13S6~peS;rxANlO zG~ieuG`l^yvm0Hkk~jf z)q>8HUZu$AsCPS{gU{=hUXwkB6Iu95v$^sy&q1AQ^3j8sfD!sMC*WatV{ob%HeWIHPbT_TA( z=xL$#dt;&)^p+%d>}1@Wr&1>`;r8lD8ZTmAh?pOhO_%*JI3?Qm#GGH&U zE^$T4w0iGto#%*uLL%+0+(hlR*`Yj z3oIoQRqv7Y>$PyrlnU}rnnt_)`cR9hW+)Dw!)Ur5p{IB1!_}jIXy7*|Zk=#HCivE8 zZbG{{_+N3Q%I-&*`Z8lQNzg(~M-zzF52K-y1g6vGIjKH!lLUIaV9wadz~IMrL@uf? zG6BwX<#P+5iGA>a=^s)#|10f0(+6*#GK9JPI_Q*}sbq$(80{q~RFrYVr`cOfave@E zqS3N+zIGXL9VLh1>$Z|qT~m_QSW0Y0dV%5V{xCSvjEc{*Os{PzNecTx+M|XtD$;IZ zxmp87LAQqdS>8eoM%Oa_!=5lxYKzEk$9VeGQc4n!?I04H&)y$a$V1n|I7TC38F}{p z8SU&jMJyd%F!6jREnJTz|3odBCiWujii8Gd++*HYTf%|o2k5q-QW7~cjd^GDjZ9>F z$d(&t>8ID_)I3XIIJ_K77G}tTL@uJh{IUgH`mKnk7-CtVk@zWQ5f+`8X_j&+}y)kcAReKl(SPQ%e_oOqcC zgj0XH@Pn_^lHA32C^KLp-UdXsIk9j`BO7-$RY2B|Aw+t34|SRM3|o48F>;VOo_wLg zzR9*`d!+v8w?-L5y$*ocC?kHxrVmgtxIeG4$%^`(8$p^fuA;3&73k%^fu{pM!m)8j zF@5ee^6{=B8@PEAl>HWN4#nr?ON-N)48<(C#P(r-Yt&qMQm1L+H?FnPzj*&4_zmx9$HBh-)4Q@TZjnn-1!~F;`KT#$feHRX3 z%w`>jAyYZ@pV~`PLLK;kOK(VfTn7!w^u+2%zaW2Q1GHZ&qh}ip*o5|bu+2x44;}r5 zFi*E&{KW_;F;L-Gmp{SupXzu(&z#N8c}3L#n!VR3KGk-wd*#7+Kt2X?anmusqTqgR|+p``k_Ta8r zntV{oPYhl37n%sUxMpK_2y`Ehk4_2RLoJqxFCBxSM($YPpu>jP$xx49R(#Q-x!~@X zhAV$K^3zUA@rgz)9&a$?T^2n!U(+N#J-UI%4c1JJ@JwOzM|D z7V>gqX|TpuCRzQHPk=%sGd$=rH5fFG`j1y2_q!cos*wQ}CUHdTjv~Hry+eBY-lY3Q zv1Ie^Lh^g6IV6hSQnwaw#@1X0lUK|qR%v{g~)Yt;Eu?&e6yAsv(sYhyN#zdy)$_A3~*`w@BZ zA(?JVXrdiEjmg+2TF|U5-2Av$!qhyB;HsV&f&ImBk{lJn6xQhDyr0GxbNVwAm=NnD z_gM}672>!!D@rxqhjRE!O1jO0sf+X}HQj%ouFyJ4#*1`7wxNnF6?S*cCytXv{)Uk2 za#rA@-=}WhI7Z##5dEQ2PX{y_qa-?=dAjj4@n7K1?eHIofz)@FIp6njTuzBb z<-O-|`>YByo*>+2xcr;sta|~{jgH_Z_+Jla9D~rMainX*W12h5K;R_YhRz--#I7jA z&8~aV?CC=cysLs5?|Nv1^&Yx(SqfelCBtTZ`i?I4e-P)qf`Fzd`bhfFp6jw|mis~XZGToH+5zwF7km&ob#U{z365FNffF}3a z$o10hB9)O`jXFZD~RDuPHMA%-)6Uh}ksVSLwt%Ox`05qujA^!}QyYmK!_FMAVNB;#yFQshT>TWLX z6QM7oHxoN*zk$$V173AkkJPLe@!A?geh2@8wss}cGC9L`JZPpJi#>#Fg*{(CWdm-- zHPp{Q4DYKf`LDD_>_PbwR#zTGRiETzz|#?8>e549SBv?ob~l!>F61@_zr`lMYtM?Q z51Ve`%^qnxOO@proa)P~ig!H+LAKG0|7d0aw5c0lxY?BL7g&k+_M7mJrUegE&>oh& zma?0YT4^WBSmXN^eBs>_0>9FZ^+`2jLq6=Gb~cl={<)5-6iBD@s>nfoq3`9hjj(%} zk{(&(%$6ng&<8Cwp#4E$NzMyd#Fp>Kz1}O}S7bn^1EZj4T?CBDj*?@q7qiKYwrp^m z34L2EWfgZ~0amp@K<;wtI3j2=l9gn{?Ifs=dGXPwM)7)WFO-H@!-(!{&?Y8;w?>Ct zNE#Fl$w=jFC!KgAWetxBI*72VRK0Z;Uc}n)rsG3&nZE`5!M$aCUfoM-YDef1PRQ1d_JA#8w+>CJwcMV zLf}>;g0IM#T|4N=YErxD-m;glUik}U&c||vb+0jL@J9$v>L+7XP1t7D^_%m{uQJU4 z)L`CZoV+o^EMTk{Lt)Es0V5I)!^nT5??>`dU{FsjrQQj%+_o=lsnUrLOsef6z20Rc zTX>&^z$(A>(G_HZ8q`sGO3YcCPpbO5@mkyo%&kCVE*~H{^?yRWjfKcuA9=6Ip^^A@ z-^ZSYO`QBM3o&iIFUaPNAh<0rXq#P=z~m|=D;u|A!O6d2nbsFB^f}XkyG7*V=6tfl zS&h*(TX1;H5qys~;ueWL+`X*_4rTr75>IE^l{H7I4r@^dx!hvODCQ(OlINCxCz6Nm zIBX>NC#FiweKn2->_hYzkkSJO%klC)4LSbD6F52iJrUI!D*}&5xr|z_m!cbUFgV<>a=5oSJPF(GAXW`l|<}KiWvb2h&N*sw>1>I)*om4X7%E z6}UW$z#*N%-JMGywrd$i-}b?j2{~0237Egauu&O%V>$MHKdHXBS-TveRdc- zcZ=zZ_f$BYrc^j;s)Zhtg+!L#fqRDCVOMz(riMSmv$6+xIlhbN4>gcO8K+2DhYgJ9 zTf!pqR-!DZ<5q^26Pe2tYG)SUkac&^f{A>xWl0VGDn(++%;$7(B z*(RZ*xdgX~CW4+{Rg?5sTTuDuDl7zsW9#NV5_9rCKKAP+N@G9Lztf2hWrkv*_e~BC z)f1~QU-I;{1%0y63>{=T^vXVuP&G#;!v-*}-wKmv=ZS}t3$^!?KyX4Vs;ipVa zd>|78?WTx+otV~u9o|&`OkDH6A@E6Hy%}IfeSF7RnMhiK%%g?#bJZ_Su k;hAKoe<>~eLU?A(Wz!+x5`PqIu->TTIK|5V)|rp(sGbSVN6gkBT_ z>|X$Y&$zuaU(7f2%RmAUGIz|3iJ1lS%2dogN$n!0*IW}qFLlQ!jzZh%D5_Vtc0ck0 t$HJ2W2O?-$-5>p* Date: Thu, 12 Sep 2024 16:27:21 -0700 Subject: [PATCH 46/84] Rm sample --- .../test/testdata/lora/sample_weights.npz | Bin 43766 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 onnxruntime/test/testdata/lora/sample_weights.npz diff --git a/onnxruntime/test/testdata/lora/sample_weights.npz b/onnxruntime/test/testdata/lora/sample_weights.npz deleted file mode 100644 index 06928df692172ae54b8d3f1a90020cbece276241..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 43766 zcmdSBc~njR|37+~lcaerr+Lt%GSz-PcjlSQnUg7V=FCSzNQ#hzB1u9*Qv3PbM@R^X z5|Jc?R7gT{&->o>UF&{7zkBcce((3*wZ7liTKkW)*FO6Ud%qs%F}+UUFcnop$^S8I zB$qGdJdmkMB>!yPB!-g3OJ~nnGIig{@Fip=AspI6#G|%&J3HQcD024SuuJG{`VOPk+OS^+H<;;L7ks1k6h~;va98^$RN>#EYn2r(S#|`^ zFTBNePu+_RZ!(zf8FkKYO)WdtQjW5eX)IuFg8!;C6KLu`mh&5i%%ioH!J8xOk<^W} zChO8jJ2wjWvQbcecaa6Y+QM>rUSM8Ub@)JCg_0w9Va5#|kZzISygNn^de;Jj9(BVF zpWP@q=9Pc6-(cJm;z>c-`-I`jR`h${VVwNKgCwI}ge!V_RJ6YvW)FVDcB51^M#TD)9`kZ8qD6H$6OW1Yq+UE_L+8^XSoHFulV3u zT`O3hT#9y|Ole|Z4fa~N9=~=^V$Gc`(J9RVPOMI0k7Hh=Y}?xWq(%b@Oq{@)7v09% zNgG(bogvdY{e~r-Q=*Rd-D%sc+ZaB70K20017{Z7!$TDf*m%y9OrGe2S9E{2ONhrD zbFpg_^MrR5FYnP)=3dnaSlW*KL5D-DzX zc47`&ig5lL7npk_4mSmU!($FLcz(rQrd*xHmPZ_9hl4RKvd8*{a_r!Kyx5D*@U$;~TRRz;*>vLq7ytQ(o~SrbYJ zYkZLZj5$W{VBcQ3K*WU4>{OH~1S-GB2|2-7;;13G1&(ID`ns}HhcdC{k;rDxwPI85 zoyOaDoM71#OXxA+3l37!f`D1;^KTu9!HP4tgatt{tmOPJVY=%BW*ivF=5&uo<9GGA z`BqojdO;uLV?X%^{yT`%-D=|W4>J9;{f8m0@Bg0=*UL@n4jHNUC+B{^4mq6J$eQN7zJSRgPstwBZtDwjYMh7X!@1+ z0M4dFljhcww0_cKvKaA>WM}`doMq2JzS0%E`qZ+RjQtQkH=ebwxR1G$Oks78>(J6B zhqwhLwAynkS(o0U;9FXx`>qYE?YH3Ec`0;ipc0??`8#I7$tY3oQLONLfA)0-=0XxZ%31E zS~K~b{RA3H&O9|20SSj->`XOYzS*BHZEz3|FX+VAYBkZq{3E+q&!_X^5ftb#w-(X=KrMA3d>ZR~8&!@sX;`uffj& z5j63*DnICRSAO-jX1Kc9mLJ}I5R5b|hj@o93iBOC(mjWTU2_UScd>!Eed|+*`(wq+ z`xXmf<9bt2X(Se%*#QIgsffvCN-TYZuGpNeD$W*vQ{LV6bjVG@7u8&$qRy`3f^J>J z%tmu?+LungDkjh`*+5*{ti~^MGvZs1_*3$@J=p6@Ez`6wA;-QZV#;kNUSbw2j0)($ zulMf4n@~4?i@q1XMt+QBM^^YKXzRn-9ZOl{iVJbTeg>-xnj+C+oZ+!A5ucr<_(njtCf;Dq^9T;woJLB z*nj#cm}eoQHG}Q=Lu*T@CMO-5FBj6bHeWs}CzU-fy#T&(D%9jR5lV)ShB}9>Z0Fz3 z{Q8TQd_b56bu_W!xhJ6jJ#(PfE_br2HR2_vJy@;N0m$F!AqE|MhXrR!A-=yE)V8jI z=C$GAb*mMH{+l7T+JPN!xkI1pt$5X)PU6U`KOt>$3;Yf>7d1aslSOxZe&BNpG5YKu z(zIr@M$iytLwa%OVF_8|t*|v}G`#p`Ao}(?M<(s2V&rlIke<9MIGb0oXD7GN z9(BdBfnC9LttW*StOXxkJAT+J8~)a&Vp?$M4~&q#Aj!#4Ayz?I*1dWR0qZJRonbc0 z4e!#whdB51wr~DJ5V!Du5R21X@d?cbrLnIrDcEzf89X0;n1uzjbK})4A@Qg^HND=) zSy<}9%OqEdo8<&6%`8~e-1{s@HUvX;zjDc)Kl=x%MREN?&Z240p%^$!jota>3S-Sp zsoCA44E(J|E!Tl{$ zrhxfz`I>iQgq-DHS-_x){>eYw@JEmX4cr&aq(<80+hG8Xu=JvjpU*P6Zkq7%6QS4d zi`=7QyKsU{B+Kk>g?i<#5PGmHTFw}PC1zcjr{iASHsvyUm#)UplpXA~o;JwrZsm`0 zG^bw1*<87IJ{#P58yC1{39EKIFHEhR#bmGCIK57WaDQJazR-+k9`{eO+gj$Z-0CmO zwX$bXThfF%AGE=x$qCYm_OPFWPvhF+$GFJ-E(SQ9Li5HWtYV}eQ`J+VjJw55B}5k% zg#2c)2|AShcmZzp>k6?OO$5VsNA|w50z-Xm(Ddj)T>g18CXe_lMBR&FA799s=V}!y z|0S@nJ%%Lj+rX_Jb_qug`^%Q3oyLZ=Eo`XSLpCg+8+3nSM9F3jDErbEyWjDo?8(*a z)G`--rNb5eVdT@<@ z;lnzX(|%e|?2$$;I5Ey&rBMe0)#8PCJ4D_0;R5}ZvtQPs=pipftL=Aif&M-=OkxPi z)n#n%x8GQI@Bo_?Xa>0+$FX4C9rXGpaGh=T;18`T>|SF*vgmZKbY&w8oY76#;NOKR zWHHFHwqvxrA}Y=;uGjMPKj(>n#p8&|Ze zai)uXtf8=ynDXmmPOI~8oG?n;uM ze13_=4zB+bH}V+sgn4ZeQKzJdWtxSeta^EV+pr!OFsX~+RxlD-kqP*%$z>}FG{J4F zJAThMr^kUtP~QHR{TP#hzpS4ynfF!ytX2cOdd?9%{=2id zklSZr!=8HTt869a-X2ZPXZpkCCJRxzK}W3FR6_A*%*8UF>m<8&Us&J2oVwmQO`|SK z_yCi&7{BZf<<{O|)|I=#xvHG>{-n{MC2pe9EHi#g*vUR;ohTfgh`ZmW12USf_SNx zCjPYKx1Dnk^_8}QuWmJ+yJ{~^uek&%&H8-B$D?c|_`{8><#c9_9)CSri*JhOfcCtspiR4##ZsM2lAgQ7MXkGx(^}2MyR*w+)461* z7~}(k^hAL+zxkkNX3y#=@c-tJ7+!V0Q4R!P~g~hObK@p zgU-BRP4Uebmgz&~#lInMVKp>eD8b`3rhH=4ZCK~8Cx#FD!no2klyG_n6q#y?iyu9p z^q-w+r*RiPH|I6Zomc~T4{k&KSsU?peMkOqy{fn%b_*PClZwuEOTl&QXWG%?$(xYGrW0XW{l909=YQ&dNG=2|THA$cidmqwBt#dFnS&4S}G=i5?3RVYA2C3aq40=|A zVUL_3fVM&D#2;{NNLTU5c?WSx#TK}FB9r{9oO$mBTWHfHJuxKZCxy?tLYARcVx2`d z2<|qFiVv>RUCInP7ib|=|Hu$ z=rQXld6g~3`bbaFuFirFc-Mf}I{5NBnY$q1!6_^q_z(^@T!S`$7pSp6PhPj%sHJie zxm3NQ!d3NfLMH)=jPBF7EKUARtR+83b0=L|+*R~{tId}c9iq2oR($RZGYC>pkS!y1 zL_=kB(bLSbj+J{oQWn{&J@++!~8U*o`8sf^~ek7Tw&3!pw z%QwxPOH)qVqQ1r2ZM%d)?G~Wv^iWV6 zpNW#fOM>j`tGp+Mi*b*!0c4pqVM16cr>nY-O4@pl8K_;`3lA=-Wu{JAloezn1Ufm>N^^H+_{@UoBwWWTqh^%; z*5o9w;<)tIv*H2c{=n7FwrA%vF0k$AOsFk(1wJ2K!XA$`r{s#Atj_`;n)1^flv|#lkHax^G1e#h z_ZCb&D{&c%RrGS+MI9CqNa3c2<7LfO7Zf7Ls>0>2nN~_%gT5R=l5W4LPgVpjWrK!chG>=HlTFtqCOpzw$b+b5gLb3v-ywWm}5b zf0d=bC>HbwHnL^5PVmd?3v=IMNI&{~LCLe#g6Xzy^calkYkV2&&a}z0`&MROfH@AD(@ZfF&Yp1Yg{jxWGjcHU$h{E&^* zzRi{|*QX_896x zvjI%%Kz9{v`pNXK*mAT3T|aCFQ#&=|n9Hebf{~P*Whp4h*y~^CCuO-V2RX?gWt?tM z!aN76P~@_;EY--0rPVxS4g-9_>~&Y@m1K?5lRVe%_>dio_`#ZX_u;hUznJY}U6O=c z?frNDM>HK&$p!X(z>TTA$^N(|V5zN~CGFq9WDbpf9~P)W>%DkRH*p?&`u-`lv>j%) zt{#+hvIPs46U#eT$;L^2S(7@$(2U#M8+%*W`D`by3wg+*CZ=)k9KNxUd>dPH={SCV z{Rq2c9mM#TL)gp=Wss@t$nSF22G0CZr^71o=(M#H{7tq26C+hJ)=k3~%|r03v4qMu zr7-nz5p3^5RS+Wju;&G*88p_isU|JhQW%cfQ#;Y?JV!|QSuGq)jmKLFH~xjiwG3Ps z{GW!n|L!bq^PUc3g}u4>PFevMd3FBg=ROd&?FbXPRgm-NPt0(ruNbqxvpDaZvgl{m z0O}v_6IW&r@`z8ga?~zJH~)e&Y>A7Y- znEcpHL2t)Ek=J{C5dQ@HV*gO+?2b@v6OU!#_WYpt8vJCT2;Q|8!d%sQ7@4jqo?5n- z##`v|=9R6mAov1!+Z2IqK}RvH@e3ZbDx#B}qv?3n6Z$w}H$;x=Bz|hq=YzInVoKOG znqKB3zLdN0s@rEln%NzA(Mbh<{c#a>55>cz115aUkvQ0XBNwfi~4@K(CdzYsM??ki5;uq=r~)xO)`!~rXl4%F2}UPAHem~XBgNcgxt>7Q~qZ! z{=nu=e8d7PQPRK*@-Js-_^~Kx>7EBQQ@=nKe}aA0kVD$%CJ1g(fyQOd{Mlz_yiQ3F ztvIC27aT1BujR>X&=zYkwt%s|e;XB6@ICG;TS_x$sf#*^u~6`(5+cW&l051wHe2_k zwjP7&0i@9V@%p?_-4CL(qamiY1`--25U|=C(jL`<7@KUwblV_hFU(1N3ZYL}oXM28wyK7{xdcaWUA z#&Scp!g8SiepSDPX);@0qlcatnh*z6`+*+IRr!FRsW7={Bl-3DL7aL9{dly69$CMq z0G2J3B->H>{+qC;YbRcwxtX~XD}1-`ZTYUDWmLUm3erXgv1sa5G~Jd-gg&D8m`9Y&1@%cy*3IBv;pV8+T`V>LSbKbup0_L za@AZ5QR_oK$Q)d`@?#>Um#K?6xk(VXsU2@*nDRX~{)J2Ru{5dChgS_2>D^m}2dGg6 z+q*=A&07avcK2fbmPs~zO@|2h_d>PznYp+BLlC$0e-Mj{e{YDUe@8R(!L{sLsuLYs zb)H3^`G|``HnLH1?zG%L1b<~WVTwlz9$ldUacfWDa<2DN7C1CVwZN^!G0A6u&dgD5<$+^&vS->R!uBssVPaH*iw%T z&vC0~Cz!reWXk+879K6dJt~GE*VfC=anhz^gF^96vH=8t(_kB{hOh+Pd8qsL0PEXu zjXiy44ynQ@cBWaMlC9rzjva5a2hqpau-DqKOvjWmqXw|1YE@yb+8s<>(T@M5qf3(t zaaix$c-64fefJbj4HZ^NuWC396>#YH4iYupK5iurb9didmQ#35@ipW zYW5g-VyGa=zHm*EZ8MvZr?7aECtbAPhU(i6vIU)g;czou^qc&iwcSw1pw2J3vuUc3 zq~S;r$MdoDstE-&RB-Y{5lsWHbDuBQU`UfUEe(#tL5`ha(VF>8_Bzo2?=(9&-OCz0 zt!(JBs7?nXv?%D}4a{ApU}3?1Fpzu1ai2$FjbSAVHS}U>H&3y&S)Vb)D4R)!U-Zkm zeTW;Wtw@-?^k~0vH{U+2~3sL+JxsJiRM; zKY7otY*U8Y?V4~R%Z9>6eZ_Gz9a#FQkAj!$VARp6X1m-D;AmwF@J@B7lrA?h`O_>; zHvgc1meM3{)>ShY{K=ChJiE$%dK$y|N64x_r{x!zT2TEaZ`?lO6>D0jL3S5yq3n+) z%+It1`3?(eFIk1g6(5*gi75=un#AI#c;TQ5EqJu_2&UhOWt%I_;KYGUjKvzxtD80D zPPrxw`KSTb!b4`3bqeiS8NP7fF?Z}I-1Jm~Vh=Q77)#-%O*Dbj!`-p&-gZH#?Tq0U zCt={*qr&i~fw;iNiZY&-VSR};&NcqZb~`-4k?F}S(z*%TuRmatNAWCQ?-6nyrli~E zB0P$FjMd?4^m3;y)z06|?Dad*$G>}7#o6AtPxmHvi+6`5o!&C%oKu3zJzp@*yv>>3 zQc+yHPSn)$Tlnp-0t0vFutyPTsO@b+--fl|5_X!^y2P;Hk&}f@ZYNnxmv+`^zBy!- zUBT#ljujm~g;Qs>pg~Lo>fSBGBa`fDft@e;u4%xP@jfidqDpu$)ry9C%20MFn@gKs zh7#f-W!;*LHFxW=a!fv2uH1)gMvxKZZA z%l^|4_urkxjqTo%PyO+VoEta5_YV@j;mBQhVr)tcSJcF*x2A%0z(Zyetw^)B4QF*{ zI*7yPEQf|KuRvaH49Al4A-Ks+lz((&4d)KRs#GI>Y_l(4!ER7k)CKBWdjc*^OQJ%< zQuw~QBfs@x4Rsi1!^4H`kXQEra+R-Q`;cES|9)q__t{Ro#Ah;>`EVEbb#fDHYjj}m zDNiwZpAz}0$)UW<0nn8W!d-9kL3iIfRuJ`@QvVpk-S%;mAdO`5@@!#rp%ri2RtCA= z?JO#BE0lX1ijwRrg3X}>dh;!rbk%gI>3IuC1p>leNu%dD9oIkGDM-IB|o^va$eJyKm%+3v_!58VOg)1N?iszg*i z^%m4E|A5>sh#Q|}CAwKylAA09W(A=o6Tz{1)<8I*DmE zO>nbeG8MNt(rKkrWO+vebe()yh~a9|H@^&{9$E6sZpDWFR{YV{&(Z=IN1e z?Sio=S+q_FmRO2+72k7RNDFGOe?{ADt;DM2B7*lfK>9g_g~jc{-N82e(B#=P^v4cZ z@Y!0N+G0k7$_#nkt_EVllQh;lO@)uuk+D`qBVqA8T~STr2=#ba1FtTY(7KuRG$~r~ zUhg-9Y)M@H+Dlid?nfoQ`&J68bz5P{=5Mt0yrUR~w`hucwa(S!f@Ebirz5l8UfkkEVIK2luAK8fh{Cmo zr*f?e67W=+EBGiGP3 zD$JYcRv&5+kKoXnA;IIF`XHzl03$7>-V5je?C zFIE-#8)XM;@+TKOXKmpbIN*sLXzjbhVtWi0OhP>%-F6EnIXm5dc!nd3ZD>KEqcyd? z3t`p^+F17@RTy~Y39Ac{qPT23N_XuM-VaiS4a#4UGm&7h`BN^iYyvx?i1gNJNAkV#Vgb4N^Orip*Jrm!LC^-BvYr+R}tL7f&4e1P(Aw)wqm zZE*Iq^$O~hfMZ;qp-;aOh2iKyrc)}pt>X=0)DTP1F`S7Bsx6%O&J@1ac+p>#KR70& zjCu6!NHXOZ|08SFLABQ&OvsBBR=3CD$f!BE{j!`jH5+m3?#fwp6$$_^*jK;1XnU=W zbyl{5R`WpClx9!suJ2$g$GYPp={z<)M2qe|S&XuPy)5|faUrdu0N?#-#L+hDKpiyb z!LkCDwy=_UeT-xc&mJ?$%Mdo^<|WM9U1Fg?6YHwzPVjbX#ltYKLHjwc2qM&&xO-PL~!2W7ZtmIk|9`Wr$ zrdo5D&W=;e&~rYH>g5T&Lao`fEzXdz_l)qi!$CA{W_YcyC&(SL^GEhGqKNRfSoCm* z5H!Y`n!9(TnGZTDGPoctnZBRB>-CaJMxM!&{kZQpAVwE1T33m|IgOEWG1FeUtXF!1mK%F_0K%n9BcOSu?7@O z`oXMvIzvF)Bmdcnz8E|ug6p_Rh6S3ftjnyUX!zBRj!IO(SSCZ=UG8Y=%bQds;D z3jcT*Blj6od6y!X?5QQzO(-J4(Sg?Xci@MH?t-R|tMTrK*KoqQ7_Lr!L5Wt)P;vh; zY39qY^Xjg=dh;KWsBGavHEcz;I+0?OZ&$s2~{oy5MJNJI@a!|eU}>H zZ5IpPC!nj?W6pbUGjs$t&n(iZ3#|%ZutBm;S_n+LcH1a8YTAp4U)C?;*TOl3(Ia0EYW)aeeU`4mowVv#r#op z@Zl@a_1nls_wB@YTBam=$L^>1s#^R8VJ4UcOQ`RdbFk#XVHhgjp+JjXbn~Vsh3Ke> z!$SAb5WlW`HX4YU1NY$AU`z2;Z%_V@f`hOl?))@WsaXH<58Vz^<*VhcqM5FiXg_x= zb(m|*r>}oQvHLt&^T%%Rbh(nac%cb@X88`f**caaa69iqo+YnS*GkSB#^Swe>U`bu zbIh=8EI3+S0OxVq6gqkXN%NO5m&Nw{xYhPBwud^;7apNsPMV_h)ne$qN?rW5Y8DkL z>4_J*ZKa@0Z3?*C#CF{*ptU7h{Ig{SJZn|rzs$7Ycg@a*r#CD_XQ$5e;)w^eq?AII zs~P+L&_&E1b)K4>TN&4E10NPwQqkK%(0Pz1U$)v_3~cEP50Wa#c)&G^@25w#87+G4M$I$G za_&NJ26PhT*LH&W>qPi)L5-ia-cnrB`6-;zaNr})SdiC|A1ol%gsW(}#)=<*A^Dyn zPI|{pjO^c^V)r`WxSTEE>95b%Umgu+7nOML$sd4@KZPz4E~5Org%GyBgk`MQO48-A z@NJGE-^1}F)%J>@yY?1h$jFb1*Lg;E_MQ0ZGZtvR@FT5W<0IN%^`>d5U3j;7R#Z6o zFz96E((5m`Xs5R>KY08W>aBH%)r>MPfIb7g6 zikHXhKocL- zbv-e=DjS5AqhS1i>%`ue@FoX!c%KL_{(`N$IB%>CKSt#oB=cvuUolM-(eQ=rTW8Wc z@f&3Ja}cxb)WyC-gQ%_DhXM;6nWSk-o=1fM=frNjm%SMV$y}h*^$udwCNmxO2?EzUd`bSK@1J_i{_>T`SLD+_N zRuprMO240hvIr^P>5L=)=Bp>)p)isp)31_s(j&!ORO6HFYB}BIPsuiG2k_VTk)!54 z7&-b73=$1_Nn-r=4=+i2q6epwVCvCa}4t({}T&}I3DsYxA zqW9C2U@cukv85(V`e`+cG`j=^s;XlBnvqmgS&QGay!dt(W%2t%b$-lTiC8$CsK@!E zu=atbnBeS2tCt-E_qJ~QlGBXQ%04Df*DlbSFNt!A2}lxYJX$jiCMX3DSv*2HOr-pt+Mj*!)ozCqC5W+oHxm z-J~t}SEj@d*SkZLyEnjg4Mh{0J)pZYbj7#6DO4Tafi{GVB8y9%_)w=uWEs~&WlEZS zN2N>(d0Gjx^Sr2T+bvf8VFgyLR}*b4Q)zf*cW6s>7xykrBT26SF6GT>3cukYwjPaz zeW&+=)XiCeWpiPOr6SQ;Q9$cHnv0wC55ZSeCy{oA(v}Cl{Pax+U_zhc;HvtUI)88! z*Y+{x&&4~7v2K;PYD+1}=2viy<2vx)s76~DU8)+ zEK&In_R>4UNrc+o#g95+YW#9`>$efu4m^im*_zCIiW{wg&Tzi>R@Q6LOH@+iKshIE zsAlSEwy55YUS&E_?4_~nQDa9?)^VljJ~=E%|E`dbEM+Q{u3);_m6e~0W#%svaovS| z?1X$P`Mhbo6Na>)4NjnhjL~--~wt{HM6>hi?PFYN1DAd3ni+> zTw#wG_E*{s1}MzlZ5QmJ#Iqls;*Md!;;-C@E9Q_~GmXno6rrWQChYTiJNVUm8Lpdp zj!8Yg3KPB`Wp}mfS?gj)R^Mq9i!Hh#^il~x`vZMg#({S5N;N5b9u6gJ(Uja8Sf7RIG-X4A`GvfyO~Lbt(=AbAk#_eixEElgCv(NV@UG_0Xj z9L)~&*8si8Y7o)+4+igu%Wu^>hxKW3Z17S8bTo{|#3KipY9DJ{ekYB^Y3#(0Mhds= z$!LYTekcU?Y!d3LJ=nD+s*pSU4?g|n0!1-HFkRJ($>%R;qnGYS`x;YlJuV@jE8Lr;dAuusb8(RFJpGafS^rJ-`+@)2wHxw!*KY`$|kqdu{j!MboJ z(*dexy<}Hz-@t(EU~WTC3zEN8+^bXWVesU}{OuD@uySZ%TAMxT#-m;M@R}`!?=-;Y z8(uS$*jfxZLQGy9?_Xz~&s`j=M$4r~nD@OC?1zCi#B_9}RcEc>$#e@^lJ=e*ym}nX z?A4%7vX?vB+Z}2zM=<3{W$a`J&3{pECpQ&#|2OsaA3?YO?%FL(dl-D)5ko&@N7;MsooLED8_33TLl_cIp7r8=VH z@*>cEoXARKt`vJCliJ%dDQr&-ax3iU-pS6q>Ou=@e%wH#uAirYPLE-1>LuF$rK4CE zJpb1S3y(H$(WdIBpQw%O$pUkneLoGs?D$lX>eD{*;ok|M|<#XhnlGMPCM)P z+KZ12+6ga5x%1UGcXET>?xSB|G$mh+!rlW7_{Us6{T`H08pkbox9!6qcSr{@r^9Qi zt~Ua$zUlNr`7eDwovo;$KqFfgaPKFJ z_E(1Fp>DuU<3Xd?h2Ioe0o|Ut@*4V@ybs!mPv5xn<*VMq7S$glIjAQ}^5$}~{hNiE zv+enlVquOGjVug=s{#BNJIRnz$jxfpn4d8ryCiI%0i@qZ~#JhUdV$i@; z7Pe7C+z<(%+Ci~WtmQEqJ;XxXbhsL{SIq$VhYPfOk}dzFb~n5{(TV50c*-#UMg2Q? zg6!n&{5=lt{M~pJQSSbLMK)@RnzbrWdg(97`hP&L>(ekzvVbyDF2R(cPwB6L7TvO+M@NhXoat62b&sOpzGV$6gEBxPE7klnP(SLq`E!DMk$M?>4#X;;)&R+)>TZ7 z@)ea^?*qj;h_dOga5C+JaT89_#Sk5F>ajTx+V+Pn+7%Dv_6KtMAdJXW_!tpqsa3B( zoDVP%v$Jx@_J@!7AZZ^&r2mCUf|mHPv>o)HmP4Y-dC+*QXwY0+1<^`9Kubdeg5Z`5=uXnlNA$Wd^&5>tatskl|w%eR>)MH;-Z4^RpnWj0jlJGK(a=eVkY>C@4^!xx$hQkmm-78?U=>-&VB}| zZH)wGBDmeqqFnEz?A`2KkO_(|wy+)7?(Zt@*0B=DYTApp2jtM|X>x^ziYB@9BCJ{Z z5G>qGc^fkWzI&QAKX~IP%6bupg7Q$3Wxw*@>;IeDo_mT5toD=qNJsACNqb%`RE>Xg z`Vie|br373yYd-#kAPoQDG0YZ!7=Z{bnN+FXjxfKvx?(LQf0u!{#B-|8@q9Xstes3 zRZPF9If>63=aX|cZ~FP%L3B8hPP#H(Mc$Pv1g!EQX-OJ4!K{@|xZQvm`_=exb4~cz zPf5(b|B!sSIP$ZR!Ad0)v73tuZ#DEP$%pl!&&5vsOjiTGV8}|k@t~u4`iqU&K~;l) z-=r;SxB7zrMP*)i=1cZFKU3zkzElxD5vI=%p}UP%;`2QV$ZWi#?R3V9Ik#+qs^r&j zd8D^kzCVvj;wM4>y$eA5yoY!zYCmn7p(*NgenhRYgQ&Ar3k4?6qFb+`=w1C&81=0) ze`4-T#r$5G@2RaO_DYI_pf^k5@NRF>{Cxsk8?VGG@4E~Qmv2CNz7buXJCJrL=DqSX z0s3U+(co{RY1lATF|cbFQf==F#y>B^;E~2qurUQ*|9wPlL!D{zDLc{r#X;&c(ucpb z!iJZ8ndjdcIT?%=>4;8Gx`~Oyo`C$t8cw(sR$jCN>1=S6l*-m)BZ;^J|(zCDv9eiyWt z-D9q%=TZ9jjj-g>HckOSY z+h+^LRb6M>U;bhNL%y@YTBA@x2Km}t9+SwA`6Vs$W#?biviPxHEb6T)yS(Zg%aX1@ zne%Q;NOeNlpjGTw^?8(xoX>`am7$k`1@>={W1y-zll(|zrmkC1Qqx@sUDJ;RLuMouT60~Ho;;g>@a|3~ebYyfANiIa&^0|ji@n996^^XM@){;T3&fC9ZFs(5C+a@7 z;(RlQv#F*P7|&0{nAO$ztL{5{p)`(3#vI~w9Fti3hb-nB5QwhhPq8MQF^bQVjMWo2 zvtZA~n76u=wT(_-L0hA6gR=pnEGKn$UMb2W*}C7YEn%wW=e4Z?&TH*nF$JuJC6h~@6^WW|XO znB|ugbhi2?B)p2|Jkq}~U8DUhp!Z1(1Ak68(he2tc~SnZ3)=rt!7=tfP=2~tSnZd> zqE{p_`MJ6In!$Tfwa)=Ieav^39O}dc_w0tT*8*5%v@IUI{sPtAej%Lxi_0|*;B1E- z*pPgPWi8d?hX0<$vJUF4-`>ix#-OAVN_=oFJ5f8sy;dtdxI=RLOd zOl8WgS24`@7T!ExiHT=&nJoLJFfR21yE#EoJ3HAA&+mT5Oh=8wh@|T*sP|JLK)<6< zrH<(KrxiE3d}9kjelln-MZ+$SF}g62Rg@lLxrdGm66-v_l}83)_{84WYWzV^K2U_F zJFa7^#$Rr5ukM&!ZX>MQyOV87%0S;!%}jgxZD#C~qR5)fSo9qOO#d0gytYl`0ymJGzhl;T23M|?vD&32?8UicrnPWC8-8p7Yg^xs zjVq4FeQ)=o?0th!_iQaoet(jaEOo{gIh$DSoNwF<8j4|YN4VnJ&Mf}2BUZdTqfqCq z-0UNp(8IF;Qzy-40V%tLikCZ4b?sElDm2Aj&ocx`tNe1^a5bGu*E&C5d8K2;34 zL9F%LEiUp?I#%b`3er2=)QBfnWdQyGi=o63r3 z#o`j_BMcsx$?YmQ#9j<~g2F>(Om2OKMPz^zlyY&dJL1%>k6B6CGjvvM!KSi$L32tG zH|}==%ZN>2mrWJ8IA1Y$zSsNfjQNAy#~L17yOaJ!a4Old)imO<5k}Vl@7z1`6<2-u)#k!2C7`S0C=M^jCH1lq7$&vwFK(u*& z#oR|+qUjElbyM)wh__tJlk-fLCUW87L8xxNA5-hMvZ*%`FkrhjH~Z;e)crDuiyCK- zH$BdCZn-P;=GHtZD6 zUho)e^K8*=axZ4uyiIsw^$Ja|RtZ@?GGUzCYUW*i4DVVWXLBF4vbJR|7-n*adAMw6 ztw)z&Npd9`{x!ts2X0~D`q4s^at*e%$e3H!PL%jM;ot=hsQxP*14eB1uUNU8ZNHR< zEA_{sZkvdi7Aa^veLwaetj^kY-)04mp0I!;tNi7oOt9J}T+lU%#=r$9neDAz%ry6y zAk)~J*EC@|T0148+_hMUvHQWQZ}k&G-FFMQ(1(p2^$sh#Mze8^Ls@y(e0FR}6ZR_D z%;u&=U~q{)mvCqgH}2pa9GG-~`5kIRosb07FO6V!^h2LB7-}wN0zBNO1 z<1h@I`wkKkUXk5?&a9|8?v~_~968kH!KbAx1v%XoTp{a%q6xvme5{YNGJ4?mrh)vA z>5YQB!-D^K%#z>v$$=l=Zq2(oOoS2N(_qVMz+%fD)JyDz_Wo^jL_#BKOnprzYe}EI=)@-|dPk>|7ykfY9rYt4AhdESAS<@f|N-z}bcbKR~5j(0k<3l}@ED*N4_f1Nq+TJxHw%^Z#m z+i$}ulO{|!rcJI!nZlwLbzbb{iBnfwvubHyAnj8=S>q_|lb))u$4}d`UJ_|uefvJV zwcVU;+*O5=?gNG67$x4EO7P^iR=6 zz_&LcFC&OPIOf8So@LJl9GOZsG}!W-j5an{T>&2JwL>O5xiQ8FtRKeZW|pNt;iouO2e_$Nod<>#V=fG$%=fP zZ~DA_h;94YVP2F9%S|rASBg@6$ci&ey<#=}P-@5~&$LF%3EI4`$WGEpuF9YAD%LZJ@nFvEoe~Z-}~*ui8Y=7nBUINYW4fK`t~1yTWk!p(Jg% zA@MD9BBJ|_jNSMc&Okh!$OaMOA@!4L`x$^=?=6xvd>YAHvzEr@2#yL2(Qq5f>`zUh=apkgm)cFb#ma?*n$Mz_lkKj?!sE2xi+GK}4K zo7tjmPwk2y(xyyh6g~J+P#<7R^9F};Our%O$PK`}$q%`&ldU07%K)dZ9|#re+{t`Z zH#IS3F!t6GE?Xopr8oUU*d+?9wH3m)pY6>AR`qaGy_Te+qN* za~?Tzs+H`1Qb$*38KUHD2@@J0Kx{^?qZiVCQ4zC-JF(vY9B4Tiam5AK&cX<;j-5q& z7g>|(evYsueHfAW5$5jDIzsn)z?{D}5z(%{$uK)*2u+M7Yu{)=ztEZ({QS-s`RpUl zip@cEf_RG>vu`aPYz3KLmeACa2r8;Jy_I}jlU5uKU>y1u(uuR{Nb=DN?#jtzDkqrl z0#|<_p-aDURZYK%QCSRmbk72d`X6$7UC-%mBR8zcJWfSU0VGW-pX+^{O;3EgN5!IL zBsAqTlg10~rP6OC(>R-+JtdHJ21}!7p*B{m+C@ZbSHF{H_e zbay=ABrjy>i7Q&L;3KB{HVl9(kZb@)J&5MG*s6oFr*Ru|6gxZ8Y+c zgoN0AV>X?W!tcBb4iFB28@9P&sOcMl1vwD9a+NVm=?L*{5iwyO^XUFuC-51mgx>kZ zq}wKp1X+wHsd1LHyWk?z9o5EMtgwc_Z~7>)+gzA)H;tqP#!=smD+?d!W|O(ojWFBJ z5xdvvl9tUk>8QOHbV{W%))(&bVN5D%b)^%%?$l2TitCBL<}xa~cN7gRtmRrVRM2J4 zAW)dTkUn=9PC~96=Wf;OqlI)B9pvu}MFD<3%bs1L^S2$L14jzar8zQ@9@ZHCb^tEO zHOGl>ACWmFbIE%j2kdst;xbN}VWf05U6GMZE!+0edhg{#>Wl)of&=L-nMdxY$zY}> zM}pt>(6muw$>a(8|0r=WwHF@zmrLCL^e!$s=?oQT&BeUcy ztb6RiOS@da04YQMnXEo*XywjIcl08^H52vB)p?OWi(}p>@F8BeiD!vCPBj)ho!(mb z_k2%YJ;0canz0I3p6tQa*_%Li|6kDYwH~r2Pl0s@6nQ3pF^s$O9BY?;M)w8Z(OXKL zCl?L)=p{FB8=nUE@7wan6J6O*Gi`o|XDNg)QiEjjnrsWIg#Pj?s1xW3N9MnRtXF_? zsg8V2x-;*xUzUw)S_5{Y?~%a3Ur;hi_}+tVeDv-c^s&1NzaV!YUv>RDDkMH91NF36 zkNKMHLlqA`Xp1L^EVB#8j&|Tp-ZkK-m}+d7pGst53`E3f@X-g8Fhc(bzCs=(UvDs? zkFhj0#vBGVyYl5P2gBHeQ&hNPL0Y;DhTc%-A6+nGPhZ}JfqUM--d*}^o?0axXQIca zEiEBAdPY3Ui1{O7m!PR95wr@^A^e3UIkI;bG(Y#l_!-|YBbBGJHkz3CdoxYQnnKq) zy0CZr2C)O}ta!W65h!|;fF>RxaKJYjm*(%rce8Kdj!13xXP`9iY>*jqR7YGD#9;EW!S*D-H>?0giTnih&!e!v63CHacBD}P|AA& z;T7Jrti+BlxRVd{D<)B~jWJB{uwqx0=VG9K7e0JCh#m3uCFor2#mJO1IJavMfBw&T z@Ga0Lb2m7#pRn~p!{n)261JG1qn(V#Wv1DFS?vg1!L z#CgXu(R_+Ce?qGWHzrH)?#Mzsn4JzL-imns$5ZqkSq+^l_1W`Zcfq1PR{Y~ZS)f&Q z5{}Gmf{+K+bVryqe=M7amlaa%?RT1N+t&h^nJmqE%Xsi9F}kd8z8qH+^@>sO?Iuki z$FHd@1=YnS>?)f$XnuT(Tr>CNGnO5of&L#cVCF`w&k+lo&86@q!H73l|BDp;j$z6g z62VUIGg0wZ=l$L$V1dhZnDOTl5=TqcbC)9?jEI5IJzHUi;O;7O35Mram*8E&sT7>| z4&Lx;{Pn$p`tYa|JR4@kuFE%P3+q(aXpe!oHBX(rWhq>XerBOVnH>M`9WWbR?AHCK zO5DE^Q?QyOWLz>K__AJdrjz)sjtKbLfc6MdaR4PmF7Fq2anET&l$v8dQIV8NcHciH?jTTfZ2? zhJuHn)=Y(~g{H$6(^!Dhw;b&T!dd%+N`rjjlH5X=K@CYIjPWbnm&$dE1#{ z(;c3qs9Hcm$Teo9{%vZdc!;dI`i82zT&4jQ>zGZ8G%k8MQATvvIMH(Ru52dBSH9~{bcYP@g z-{L~sn>KK5jg4T)f4VX73msDIc{awYN<4Tu^}hZKnM z;NEPK=Jtn+i2O`5H~b=ZpS~v9vQDs3`3(L0el1<8(L!g%W)rVJ4y0}0Z&G*XGP(Uz zFbp|A;j$i8Q4=L!^5Feb`rE(+bvL`h)OAuYWWocIl(&!+HF_1sPkKtE^UqSx3L}~s zcZTY(HiO$iCfJ-5$7y7IrW*r|pz4MKmUS#7&KKX1>#B|9;9O6fx55}g>&(gJ+-D@W z_7?p(?E)>Uw8H1Hi-=9YNAgHJi;A`z7i_zhLN*_HO^^Fofr{!rs=d+-zmE|}DuWYA zcWnlfI^Bk(4O_#N-dsc)7dR5HcsJ6Ye~OfRIYxqIdxeE<9Z^#heAxaSBx{E%mIdD9 z-sb3HH{V6UM~o3>-|4w8H|VkNFG*Z3OAA({Qj;CpRAe>dR+?oPsgu+bS7t859?^oU zM^q6z;>f7tJ*4dKAGEQ_n4DQ@0lq7YaE_uH+0g2O745IMcZ+Sn^WzFKcEL}k<-Qfp zFR_L3jmmiWNe!8BT%hIbujZOPO>s=UEIzW=fH`YclfP%`;Vu(f^tdw!EDAo6$4PED z?}sZ?;|ESsao%Uz_KCEse=2pA%;0p7X+!v>@mzvNA9H=?N#ZLIh;jtVw4V48Y0j0R zK@av4*YWPuJlhCnJl6+F#X8P)*dgxQLlxNKwt$|~bcRm~1JOvSf@FN_<%-_l@G)5R znsZJlqInAqnDD$_PTc;haO7TXTp26K$%560*XYgE^_x4v6|UHwlE~R9Utqj@ev{Co zD5mT$LoB$!6Ftrlk6B7+TKY_y*DsiMoBFwbU*f8DrltLtOWgnTF3w-73Yk$0A^PA6 z3YRKjr|2gbtSlvEU60Xca5>t_+Or2-?%=S?Zv4|&2R_y50>JZ8kdUG4Z-I8Uh&!{w#ihKPFv*hgLoK^mmS+$%;a|%Z;Q$NuF?T zyKn$&#AZP!>gAR|QiwVKLc^2~I6s44mDz-&)(dPk{aMg6vIFx@M0Z)?6FpQw~7aG z@j0f+O^a`9&IBdR6i_m9;IpRa;@tx`An;5+6ioaEtJ*3cU!w`H@ATvksb55exH6i= zheP{gSxo9MMZd_G(EPd&^PIbB^hrDH>(uAvGj!O$OqJOq{sZ~%?N+RbsvMA2XHaeU z0pYe^hmC!Mz)7JAO=7)aYnnBybNDfKk2!|NOXS#uujfcox-3|YsKu4rMXb|LAAGPg z4T_>4&>`vPAYk7ddj4Pm1Pwn&Tw8-6rbml48NU&q{n24fBkQqxMi_*PO?g+B)l?&Y zxIk7`VQn^L2>#?N9fzBMnOhI z9Gs|BWBuNK1nx%+RCws(o9XK8s9qnCH@%0eQJYon*JBk%dO_&7!MKg-5OhJa$PTsy z|4cZ8z10gKeC21nE>`0Y*(>m(oOo_*RW|w2bO(Kgn6VL?gJ4GUU8LVl*ra?l__D-+ zCl&(Dxx<>BHM|e(SIM*0t;4bNxFPQydLAB?M5ErtdI-5)Nshgh;VYi)$B6?D!Ru}d zR&*hZ*3xb~li|XyPrU+*Zp}cu$jh{4-x1IlGaRkv*zrZv)^i~*vgy;_Hf&R<#^>7y zq5k{V7`>~7RwaIfxLfA%%hQt$oAw*t>07bIH5#mF*mxhkTOW}snDgeXX1u6tb>ZYd z6Mm+H8LJn03KG3F*uQ^jv8&G}KVB=ppJh#~qpN!FD^FH{%&6`8PY1ZFRTlxr9`@$YG>aBMb`?;BkOtKUy@GxsH9 zW%?Jb_p-(iLs{l{p%tX)@20P0c#`Iq$&4B{loolHF+qFeg^uYI6SCkYx3K*&B3=(l?3m~At~c*AY zC-H?8h|bz7+Ym|8A4cRmk&aP!hlqd! z#Au5Q`Uh%Z-uN!2?41cdALm52&oRa{<-9`C-{zR~>MP9*{6zAV4d8lHBR&3PC%uu| zN{_^Kkbs}I^q|dsQnbdUkVflZl>b0#*XfEEy$l4QvLo%wy>!V5bN-lP zcSa(Usql@I`BQGSU``MnPV@GNEvC7}=V=KZAzhEnXvN+@y5c}6alhO`E?E>&v5x`G z^E<_@pq|v`#uxJY=q}o~?hy5TIggvBBZ~&7-RQF+Jl&K3l2GqKP+KiSPOfbu^;_=I zLA^?7)FVQL;4CixmnZl}Oy67xt4be1X7qOv{6zDF& z{4vUMu(0p^Q@CH%5UO%c2(pb&^pLI{mWW@_1ntF~)i!~?^I|uVy5b7r_fMI?Q~ktD zb~nj%yGr&Sv4P0;vl^4ephC zSA(&5E6<8A`*et2@z>?M=F34fc}n#peIS-)NPC1QeG#X^YFH12D`WK7V3#NyIm(@F zv>pIj%B8dC0e7B)p#3*58ndL z&vHQGXA2hvRc`X4^Ymm-G1dpa<;ID0*x4$!Y}dFr6t@qBF_Sb{yZwb|lldI?gvH|8 zM<(owVNwVVccHRJP;tJE219QTo{0s>(vri~!ZlcyACA&~pP*#GMMw*?1krsR8t&6c z7JU4M7aUHZ>zWX{dbldzkmtl(iUzZibw7P}s+YnpHFbXSJRROu@FI;^{}MVhb@ScirgBti{DnWtVxT0`BAzc%l^9fJJq-O?1oGY(XF&odzeFPmFA(Y{7BepV{$Hy~nfGuyv-u!Ttw5yLH zirG%=QpE>&`1~N2FLPu!x-{UFfmhI2SDFnmFy$90U4?a}((L_28CE1)>Ma=*2Xf4P z=!!Gu_0Nd-&4r%4Y?oV}eJ34Z1=zo=38o9`;E93dut0MVZ}z+fK3T_Nm&tS-SZcz4D|?PN)vuCw zZkBv@w3z)mEeji0cjL^+gnx8#{>}qp{!=CHUx_I=THOlz^#u5c)iSD4Y>!PNACto# z9CgUZB?dpPQ1R3oOys>bCQvYw{Fy2Zo7;9#sXPzN8CO7)&G&PzHXFGK0WtJXR|55N zvZdX}Ce!0ANPGq={zp$0A$+$;+qBNnRY&-YG zNCxuyG&onInM7jhSm?V!T-dBVl+0T8k=9NhLEoro0PkrAcG$?Ime^5u;|?<2*Awh3 zzSE4FcU0q39t{qzB=sV7#z4t}DqP~2!?)!jcZnIg4VQ)ooGE^LC5K~W^0^J)tx?<@ zOH3w?CuOonxYLn#@Tf9|o=aB7ga=Ms=TBwmKCM7j4L?JIee?xuq6R~$8AR`lC)3)7 z2x(&#obQ8FE?m`%p4|{Z?mAu;jL2C) zZ4BG+kYvah(&CCi(EeaHnPT&TJm`K*E=R=^oqP}aO}I`<&C~_WOD2$VSR1#SSJIU2 zZZIxU3y!R5AkEe)q&?&jH>dXsZ3(R-!$uB-(&1a^`A`wcM%mFNFCLS}@fB3T&IY}X z=Tq#NPDOty;;hge#A1^+F4^jWen+y%&~jru@kFqGeo|nzeKAChPHmhX?Fh&3CehBr z3SjI^Y2w+p^tVbXg=5#rPZ>|-W-le8sO8MIWqo89rqdsP?;=6IQw725eJ<~@I_L1^ z6;YV%N*8q35HFimB=GP>Vi()R_+%fT>%497x3LiNqe3^1J+B(L8Eda z?Qx7u`kX+SN*g@>N*ji<@ifHZIo)LakW_dc=WaZ+1@YhsWauq@EN>q~Z2kJb5x%Xo&1e#n4``& zW@@0gWIB9Hdx^_ch19G^R_I^86gKqMp}dJIYxeye3_C0A!FEV+&NnySci9{wsqo|e zT6hxct^8@<9A~~^j~y?0^qFbhzYZfB&LdvRg}=2DF<3{N-#Wm9J-^4CALlZVl}Hcf z+{bUilskItkvpYu;=ZuQ_`ZdRwrAtmfUD&0E)kz``VwtxI05`sGq!b#9N+i!6{f9z z$?ZryhtuNQ@SeIQAC`X%eP^ykEs!FlPG=IbDPtxp^4Yd$6TAFW(_yIG2y0V{ZZTZ|yhmn~a zkHLLbeCK|FQLxB_&XJbp6^Bze)?-+6PMW_vwF%@!R_xy8Dy+7J276<-HEVTB9p)@n5?Es!5FEcj zpiCLe+w=^1jb@bObo#95w-MMo gG6;=(pD4Yg@z|}?vcNw^_(zY6Wwa$1*m?@;b zZJiA3g*t@v-(Z?fynyYFLK5H2e!@L_k2Vm)=-smh0SDe_YbORy@d7=k8hLG5ZKX!gk>HE})I(oqTb zG%9iI`BIvHT7?y_X`;hxOxbzvw=j){(GQ*Wtb4g5zxa$UzwxyNuj8MMSL9{cqJ5ee zY;g|dGS7gT(-++4bQ}f-E3s!aW`jdW6XfhW3JN?&0yZk}<<288PSA~hIz52BQfke+ zX`I8rsDn_(OF_EVZ#+EJoiE&{#Fy=hC%$36n6OWluJO4A*FVR?W8Yf*cw#p$YTSjQ z&&z$XQ|L`w1mjgcp(zy8IZX$a=9GQ}_^I=N1K&{&kBa3qGCL-}}kvCNZdNx=xeFzhnwl zHIw${;moO{x9G|@XK9g?5>Xza2Q8_W$XNQFhTB?E6=yL~bu)%wolE3_b3gI?Qcb-b zj?wI98Sr_VK)?3Y(kb`v5UVM2r2b|zmo}up=W@*nGG6N&9q4g^zB?cbb}m1-B&}t% zsYZxOtGUgH4$68zj5LNCIqm;-24lRowJ^o&Y@jgE3cRPb)BAT4Xq?~=Nxk}xUYz@p zUYT7)f2gz&@9cWIwap!N9&^Wo^}C6IbsYCgKpL0!y5PyPH|f$vn$UP{0`a+{4eGIG zaN?*du(bwI_C$x4Jby>J`__}*E$ZY6s$=;;U-H`b8g*z1rVo>KAaD72E>&(d-4|>I zlIsQDUAB9P2%?60?hPeb12&OwcW=>?7QqXiFkP$Ky?08>86 zLxz?Rm&T1JQO|tomxE?-a^XQDyZjH0rct!<^#I~)6khn?Jx7Yx2XXzj1+<$h=YHJ! zL=+S!(wI^9=H=^rsH->MEnLQa#L}=MrY%rd%2*xk%DzHs`-hh4|W8P+f&b zwEliPIg4`zLa`ta)w@bYiiFSDlo9mwO?Ples-+P%an!Iz8NR>q1c~0w!bw?z0MAm7 z6y)cT^on9CX2<;FoLZ(I`sDxU+x*|wxBt+;{df8{{~MTs*T1>&6@gXM?M*pa7%aeX za;og}?%TL2MT-|T9OFVB2w7!@hFC6;V9eG(!y@C~TM7|(BW;bNV6YnEZA3ZU9bU^SnWPD_EldWY+LmaCc6AV-|u09*W@ma zTN{PZZ(iWibQyO2#X9H|lyP|uUoqy3h(EG79diCOKmfJHan0KNaHkV^*=IWb8Rfu+ z4R`{JQ*8M8Hy!!LZ2@SkAtY3M*oi|_ba~m3DD0T-%)86IM)eULsBjcu?1&Vs7M{B< z`%yUamNefU8b{9zmf>SHUSi;oH?Xe$D%#DINBy-LY;#I1wMq$triffv-loac=t;35 z=PH@0Zx4_5Dbu3#s0bh|(d0W1A64Vn|(An#KxwPaOUU5P6jQ=`tV3hIG&WnEJA=rprN z$XQdotj&r_d1kx5H6OOO5q$shWs(OhfCkM%$lSXR`~RvG6mDra!sjx?D;C0LD-Twm z2z{vSTe4>9TNv&n&#qV_4GgaZnKL%vyiHE5hVo%}Fzg<#uR8%{=dMxTHEJOHNDJRA zuwj|CDt!I-vrM|962CTAosD^VUtm8dkcsAR;rByB)?l3uEI(??HifLl=#{(4oRL1z zfBO)YzE$8`wVq(ek0NqpP`@C$(`Gfx?b!Mk#Y|deA0wzIc{go}HiZIP@30y#DP6FIeiJ-}EVCbXTX3TFarijFo=qqsC<(aBwJovX<9&Kz;%N>? zoZE!;mYT%1-I3lcu7<5A&tarpB1km8FmpF(vwr8UgVy!WV0$1J6BZ9AZ53ZpGQgik z2-$G&-deLOdj7`ky4rl4nKbT_b7pS_y(WQ{$Kd8Up@(SXAR{%Ih7Nv5>ehY4t)4A7 zGR2733#x|XmHTMyYGISPUyJv>p~(0qo6?g)I&$;EN}`dvh@Ol&g31$ASvAw|P~TBQ z>rNeqg2w$QI;K;2a?d63>NAEjOX4wQ#XF2G)8s#%cHjfTSSqs5@oBGJ0?zkp;LiwU zUKR{tSgsvg^s|zwnQ6!0Ry1L|ix1O=O%#uf>;g04mh1lF5YAP6f_V#OQ2*3nsIIve zopy}Gr1?`I{>4KunjD6k@8n{$<9;GJu^4ksc4J=YKt4gQhK3{##JpAdWXax22tB-y z+~yV7rxCXyGqWAlW);J1ojs`1se<-hGOSm~IJhi)t@?QmH90v0=1hHo&F>e(GFM~v z=xtfPXj>gqB;`f^`Ygq|c0ZxM%W`O}zcxQ}Nj>fodi@_`&*6fCckowT5sC!fNWI$# z2)_DMIA%6Lr1M&^*<#P1PsoRG-`f~AD-)~p`e>WSBajqqX2$>e4$rta)HoPPUwaN< zeFYz+O2kZXDZh$i>o(w)lNQkDtIB>ou>%t<=Mn20F6>=>S>APt5^M_k3GJQ>Y0@`! ze%r^L_;}vm1UkRQE@^HXjGgkCHnyWAw1neLw zUWqM)>RleVEh`SHja{JrUJd^ z=qF*2=0)oJ17Q3hM?NMc9^>?^crX{(Kg#ZGP!5Y7A2k4^G9V z{K@C}5IF80tXyoz{s_1NGE-&wX_rQ#+@e#k$$K|!y{p3JmszoS_2)?Xp#897rWv1J zYrv0*RAyJ0XrpAqP$qB4Nj#Zz4DKno@-Bm|*!2Z}(0+{I7rL#;$ec7|r%jZ_^LKI} zX|cfKA8*XB&AE<}Abpy=qY*o7vf-!u0ABB7FVwf_k@>ZTtWjSZOserTxNC&#?kY{YJ+5s{=0;=DPHbMQ};6yGIVUJGh#_knzRhAS_2>7lCA zP1*WowTz9xu9&myFqj{9Vl8yPz^h5sVBTxWM>icMu5T}4NO>~O7P1&mx2ds@pLM{X z7z;Mn+83VZdDQM`sXO{c;I)`b1he+@kC2k_bV zlJLMzQ}()=CO^Vlo*)19(Ld(5(KBuS|1JD~plGojHQkfUXSb4;sf2j2e=!{@uME*=9f(}eFLK?di+mpYl3Ij)pmi3)bZ1pZW{$Rk z*nu+mW4RPQF|~&uM=q0mVG0+eEx1)$VoUW?9bu>R9#Wbxo%(ie_i4|mVTNTZfXRS3 z+HPY`BU1(l&fd?|S4Pa#%(nuCr=8s7d)KH*dOV%`wv#qK98X(iCec?xXGmIZ4Lx!? znJ8673;ez0g4Hh+VN`)DIdDO+OSO733cItJ?9xIZelm%}wGlK&H;Xn+?LeYj$EjT{fl1Hdb6j6I~EsHN!0p6g}a}) ziUxBcdfn`EJ*S0m9}Kb4s+&$fsD`hcb4g@wACngp!pM7d(XkE-Xx=z^+9O*;Dh>}K zZKo

RClvZ@Getx;lYYpDkn(&CBTmM|Dh>HO6?qT{P%vH@9_w0zBSYOJ#2jCG)eL zVN*yYF|5>sAkUwS$rE4t!c_>a*^V@0m=PVY$OW73eIhTOIzV2_d2Zzo7j!VT#!b;? zxJxFD78bc;v9mGWggeZX{Z=?++I{li_(Rg`wuJoJrU4a;R**UK%upg1!>NsUL1zk` zUQuZevop&Ab2Rr5?TO});`@?r_R#~4U4)7!?JN|(sPmb3SOwkm6d+_3$6Wbrjd^Zr zq+cd# ziY~YOSTTj)ayUB13P#Btr7f}MsKWbDS*)T#+J_hg!@p?67Ihpw>Kbjh(Mk3we5NWM zTIe6vPR#~4(8$gV&UBhSaiVtcDvqlYuKqs~ zVT5rzv`n<)O=Oqjw7kD?wSygdLV}ps{Sw<|e~0AZVqRt9N^o7-PEN4;eEZJ7F=zS> z47xNHjLOqt#>P(6SQ7y0yAeXx^m6f52O+*R3wjT0!k_uNd`N3AQ_^q;R$PomuYEIN zwShbBDH#6yg(z>x2)UC(Q8uSFR1U zvvt@~ccG$I$}+hl9ob3OrTHz>2kT{vb^Z3EA`ytz#ovZW4lahF=*;M7#^a}Zd|C! zzT7QlJ()6WZ_^-($-*)H`9aiM{{cb@9+Js2!foTiL6|0F<*aNU$bJdDiF-|)Sl5dq zQ1nfUyEWaM9X?BnH5$}`2_>?8eUGqT@V6xj`g1`}Nabr=*$b8D3|YF#nUzEtaWm(8 zvaZ9{)13v~VB0Rk9?6JDyB=#)6J}bkKW9kuH3O`_e2i%ib`nUl+tFP}V|*UH3f2^q zfF$%cb8R7qWm#*$H)=DHa2?#4tbwe-mqy~6rOc}y6Xw5{T7v52C`JxBOk01;gD6Oz zvuG)W*z76@KdOli+nV9o4jVRipA0X)KLeU3WrJsQ2cDeY0{jahWBueq7_vx$qFPTM z>qK>S?U_ETU*5xLE2^_%ZH|f~Uy}v%4&mo~7q(}zDIfgSi0^+@4jl(JWB$)G=%=s2 zd#$@on~xr$tH$kvjQlZ>Hf;f!mHP$Swcg>Bb;f*=bQ+2;Y4e(CQtYVTmgqT84eK86 zfc53iu>GYU>0auI1s;0^P3{qTcWg0?iPYmu-imm$G2I}(A3*BXB!h@raIq6!WBR8i znDD`weOYbE8;jN0qL|6d=3rO8|JYZUJw{MwR1Bfjx_!iEdn8`}@e#gkv*8bJ?tl*c zN?4Qh3UbaJM8E4zxJ{!GF24JUmY;O^<_Htm7pTk5(=y=KeNM!zANFwlcP-4kWWav( zRA;+J#NepahVbHrBkL#i30)V-k*2T<0?jxIUWL`bn&2OJBg2~YWY^-T$unT{?HcSE z@(p}P3l^09J5hY$GiSQz0v65wirp=S*lVuEhi|=0c2}FS_H|-b?EH!B2)c|FfjLC( zMiC@6?1n?XJor}sSZEvd2^4w{Qd?_Rwj=i|e76*M@DBM*!Y3U_TW(04)SV${Za$fJ z$Ciap_Iys}J}5Cyg(;U$f!Kf{lE=l&&8h2f*Iju&HvR&5`97p-YwBU8jEKLy%8Yks z;z8na%Exq=1gF?5^8ppcRO0@zu>O(~hBRL$8QC>Np_{=!A~!bSfC!?W%A+mo%I0wd zlHIe&xL>=_Jv|s}Ee&Ak+w<7&Vn%xh=t82Pv)5X2224F$anZzL9Ij-_daez^E8k>! zr*Xj`BW=mny|U%YwMG0y(F{EEL4jXpavvowJGpEl1vbn5D0=Pq3slbdV)h6jx57NA z@RX7rJNLXJE1r=hynz%@dwD>&uYBpH-Wnm;>b?@WX8%4T8NXzh*-nX z#%#&Td(hwi2nuq)L9lEwCcLtzE7pqmG?_^_b(STYb;b$uHnSM5T!fw_T6{M%9LBv+ zX8oPFg2_)Ctl#vVyZRyvV-FmI$gL^lcvTyA@0|{dYoajWtOr)^lV$5VyWydu1uL+* zFxRCSlMl{Dzob54mirACqFveJxpJ)Eieun9^Ak;fCot7tH;{nG6G__gV7PI)0DBCQ zQ1|#CKH%g!+FND9PpRpEmN!SBNDxV;`!xz$e-Y101cvJ>4L-qb7?B+s1T9gIAlh1s zzqekUw^^77lb?C8S0_k8m*-|2CvZJdx2xld_QB};Lxz8+smG?t|STN?;zE zgwjoO(;;S~246bM10)Z+ae1YnwvrnFvL>Av8rX#9&gZb=xjs+V7_hAw=1|1PFi)pH zfg92`Y{~dN(5IltiUcmBv{@&+J;KPGw(I?wP ztoOBVAi|3@IcyLv4i;wbM0ehjm*yurw_kr>T zd8rQ={k(+M>j-9S$wN~AMh>Fgld!zdgb&{=1qr);(rVSyq}9;{^&S>O=sX3;yWL3s zz17Ecx{K|~ zzv3=AKRAx&nF%((-|<|6^e--=m!%Q2Ey<7Od!%F5TcYu21IKKRAWBQFkfO@>oMeC( zDWAKT)Zb8{0Xep`SJj`ChWw=M;d{syvo4bJK7nS85{UL1)4T$%)sO5)%+sKdlBbcJIy$yk4Ywl1DeO9gq9XzhoB zvT4mUZMg;6@?DXPjjv!tpO*=ciAqj1^^mtodp#}MUd$=X^(WJka%sq$`=mSLJkbz2 z6W8jCoUhD%E`E3lZH}xb;J=qH$+N3A`o*Ok8bHF&F^uHGFYi#< zIaIP*RS?k!6xucK=dwQDCEXWPh*xks6}c&SitsL8 zI-o@}_E*!QIt$X;8AJ?pMN~XSaF`u@U$EkP3gOzH&;?TtkhBphqL#M0;-4k!>L_ zNS^i^ns%b67%wcqGgXc&O(#y(d-~PO%I5{g$DXKteJSb|0Eh0)w$%% z8%$dEZLW5-6_u+KGSKA9IR62aByztw%{(Fetz}OXh;G|4We*-O3d8G&RgX2z`#pu3 zqE<{Y7S5(tdY#05-U0ITOBXF4=S?3hO`(z>vpE&hFJ#l*=d?&+3GGfvrA1?e+qF%O zV6*TnOokwm)-i

nrM~Ydzzs&o~kVPn`k5yFr{P~IcvCsY7E@N zFiJkexAOt1m3$-9ddsMa>P>3b;7UUE4>4s^Lg}>i>!|3yxzF_@ee|I24U$3RXxoTB zQh(+Y4cTeSWDLutEnjyNl~)loWMLTL?uki-Z#$8d|4ITvP?&~=n`4z0bM^Wt(tda_ zZ5;lSy1#lt5+;8oB`=GK@0u@k&#!ut))Pw(KIvSqIqwh(Wj(@ic7EhRK2@Pw+xq|7@t8#O>!WOlXuf2E>SeC?0n&-d3S_cE+;St zpE31*ZG|D#uL@l!nb1&eplAC!scS_b7cn-6hQCnfVik8$_pwzp_-YJwk`AF3voncT zeh$;*)cXH4cdkKETv;3+2nN9s2Sp`3bU=ti9vL4fNZ;F_EMY}gO4OR*3ydVZR0;`5 zB*}n?f{0T>l-D9FMtP|5fr!kU+e3k%u0d8;7!?qaAikn2;USE=ZDmt6J&xn#+xApV zSI>uE{XcY{xqZ(${{x~GBhb};6TT6YLXG4mSc=EP$)_7p@3wQ`VNpeSxA=Lj_h#cu zeJvy@9s}>e3M80U0XeJ>jZ0Jy>vs7Oa~a5d85M5KS2%? zPXqNl0GbPZQ1Iq%5dF288oj9k{KId;vzISWVB-?1dQlo!L|g#NnnEhGG639wf%5(e zxKeTnlzsCdHsTCg%uhtSmgZBPztSi&OC;KA07`5(PQ zWwu@hqvm}ea%qN5d0DKFZMv+7^|&7n%>~KCaH>?8MtL8Q$SkAV(Ao`6Fn0z6qD#}| zlAy83_l6ve?oc3M?kRa^K^2754#`~Z9YdKdE+}VVHI(KZL*AmRvN?1D_~jph?!941 zCHfIOt|dTeZJj*&Ng`S+0EA7tq560qn#A5uO8FDHcxN3YD#`(?{lm!PTN#q_(;;5gRfC==m@S(=X}68RoF5=XY=XK{>|XzwGBHKvX%O^Z0$nTxFKOX+!FwPqsAXaBq zkdU1MW3qUG}<@ zRluW@%}3LD3A5-itxj}C%URZy2a)UNrqc2I`fxOHWW>j7Nz?K+JWERO?#5BHpW`ah zENVwXR~&JCe=_+?-6SUPdOe)}5^&IR5iR*{f_%z!TPAtLjONR18E-IW|8IR|l{*$; zzwt%H?w7xl1x43zFFV~Nq(Fk&8CRMdyM$GPqsWokt5|x)8$>}xsC67WThYBAJ0De& z^^c}98`|cRTc5Y%gG;+e=s!ou?IZ_!jbslgtXf1GUSGg9P3Fv4W-7Dy9Q*F9a%1+) zFlLI^SYschG0nHn#K?6jeP7j1#Pwm_S(=P=_Tk@I=gB7HJ_QBMyrg)yhPC@pUjxu`45@$v+`~pZ)OId>rm5t-xig zr_m2U7;!e@gl=;s|zarm^IkTUTW9-`uKuA@DEzP<_PSUZyvF9Dr9 zB^%YHn$X)5M0E3&Jg6K1a^c^sAEabDQFCOQorPsny~e+VSr8dg67d=T0JjbFcq? zNDM5ezlDEK4Y8(l?iAoZ$UOF$8gkZ$txGp|0-ritIY%q|ow;*O)!gY<>f|cV_mg#L z=1!haC+95CO4e9Y`M4#7MSK+==e8*(QByV7^;w-A7oZPYmu{}(sycglpaxqX|5Z)t zT(?7Y{Ja%<;&rL#I*qC0t%7yn^^J{OmmhWf%e8vqb*bk%Y^dWqztDl#H#Ty++tu-P z;dc6`J{Q>|0 From fae529a90699db210f6d40df81b822f0f97a7136 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 12 Sep 2024 16:30:05 -0700 Subject: [PATCH 47/84] Rm sample adapter --- .../testdata/lora/sample_weights.onnx_adapter | Bin 37672 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 onnxruntime/test/testdata/lora/sample_weights.onnx_adapter diff --git a/onnxruntime/test/testdata/lora/sample_weights.onnx_adapter b/onnxruntime/test/testdata/lora/sample_weights.onnx_adapter deleted file mode 100644 index 99f5ab65b7de0cc1091c32b26162025c158b2da7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 37672 zcmb4r30O_--|kXr(p)Na4 zsf3VZN|MBzN|IEPgtPzWoNqYidcSjhXIaO6QB1KM!x(nMgIN6=^YJGf)v z&RW~6z_%)$qz_aPrnR{Vl8#P(+hQ%Dd~G9)S)?pzc$h(qi#0eN7=*JZCv4YM6%y9F z3cX&~36bU3AZ-%Cr#CEw&_itygf`4Sy#Pz6R)K2_!oIsb1s|JPSSEcM~7c!4BNmiLTvFY`?tLn~AS4a+DX z11*I5?$_x5cMmw(_8amp*CUtL%{T2CfjVb?ptMR2`=y!*-gygo>GoTst=B*ycx5F$ z-ZLC!9T&XRQ`?~LnQt(#!Bhy?dxreli_ox6gI(RO#H6nji1Wo*)TkZ@-Y}H-ZtfsK zn~tH^`1`2wV-OUbSAwhAN^DTDoxmOIK_tnkwB`A3@=ZgV4Y?~~+zNy{6|ERk`v!Zb zsSDCD6QW?44(0)gFnIS{_@QPcguZiTE&fVG$6+FP+YJ}F&IX!wW(UkU>B?ewf}T^Z z;0h0G;Zn|F3^cg|VT<+PZH|^ucK0i7+;!6ABGx(SkjO`3YV*PVZP0B z(KhfHEI%t{-F_RO`1efM(eENkjWWsTp>ecHR~xERF2TjwRzhQL4!E7sW%*OJAaPza zT$^Gbv{rwHWmbFWj^35f$%jC^jU}`$D1}%P2R!4YBgC5@Cbz=Xgq`c9LRm=yz1l(X z_A_^3oUA`{=D@s zxq8v!Ag7@D8y2rw>*V6JGAP(-(dzZ9<^``=9q1Iadd>WK!<^PHUcPjh9JBaPIljo> zNJrlO9be@8*~VXJ$TC$@Jfe)(f*hJyBGBqlAIRutkw`-PL@m2g>ZF-KYJzI0Y@!M+ ze$!5u)Lx>ib|;YJyU8R%EF@jV*J)OJGoQ8p6isnB!si?wNkVLl>FiG;ox8k-dc66A zN?u(fk{HIvYetB&k0OmLcul!aJLuffE*emnnP+(+k?bzNPIT6EiqS!L$oWB0G|N1l zB>jjMV-&uNA>9HEnPo*f8t#(TnQw^B@AV?TDTJsmxkBsT#0{BqY?7RILNzUA>WC^D&mn42S2*Ml7Sj8aAZr z-Fy1%u0Vn<=8&*OPx+#~X{0zJo1dGUMJo#)(w6lSaz5`0sa*GrT=nT7oyp@!8NTLK zFL}~u2aiy7o$K_zQ67!^u$$UX_(X#oB1lT;0ot^9J}tsz%B_8$TRgRaCatg}o4=}& z(a~i*_i;HH{r<7YO*!mo+EPRFw-t!0bHFIC7RtG(dvd)td*$Dr%il50BHpzbdArMNN!rO@H0^*MP1#>T^Q$dM zQ=30A?#)rjC_i3y=$SZwSPm(qmiaM4)w6H3Z6T+|lhQ*ZD|>9@qHk=9hX z`W2C?6pQ0}J|Jr+SkVx}KDU%x8j64qA^WwpuB()Ca zMY}$PJM2niUeo!SlsZ~&_%}__8%s47{6%zPa`_@Mm7MLfo$B=4&GYJB#JlZ2sgivm zQ@e|)hGq`6dnYB!j1Kd~lb6w{q3bAj&%*0^+)sL__YIOllxg#@pQPr@DH^b&Kc6xr zlQw?dMKmfyXu!fiBHopdGVd0mr1F{g1|ZdNyGWAC_4upTN062ygJ|v0Z`8H&5s8`f znH0V#Al_>}(cRx`NK#iMIrOxRG)vNsN9lCj_BE{$wGGd}VshzZo#yN-6q~h~=8|U4o0nQ@nY&Cq^_gO>D;S(siQ&wwVahoJT7POb&gF`I_(F2 z_xdH>sFOl-WiP3*zaGu{ypl9*xO@#lByXY`nqzAB*6_1B2IX+90PFq@>DIwUT0>q|FZSD^XEXGu`SMC#x6h}bR8 zr4cS6L}p!`8zBj$G1gK1F-)QUvm3-=SE{L!mN!Y9F@RY2&Zd0ppR_D-D4DssmP*Hz z@v5sX(b3CZ(w3j!iSOOl8uWS}I~uKDp))_WPBBH7J*AC=`92UU@|i%RKWdq$=A`irtDnq3jdv5X4ld-gKTo>`!l1E}R_R{d)E2!xckz+rScGHV=fBQ{l&i_GWack(tfIYPPRtHgvj3xnZ>`DC1 znbhAdgxGJ-pw_*HP*2l$v}@Tfy6oFMI-yP^Q;!yq(K}RV7@0?7HjEcTcfBIzG0~)= zV>OW!eb4(BzwrKFl;Yoc#=jo^_xQ!of9Ds8TmP9~h#h+|Z9)tbs#&vkqfuBq`8d%! zbQQ-`I}7<82Vs~=3p7r!W2QbV|nX(GKi@p9y>Cl8?$(AK=qC3qi&cuC_Yoevw-EepsBUuH>Ayr35UnsfrS;>uu(dkLnZ663B)srznwcA$ry&~93V{!`|` z&;TQ0!$M8r#V(29&KF}#vkp;1N;KMa*`Sv4EA;q%hBRC=#+vM7e9K%%GIz!{bUmAa z&mvaAn%qK=E&IV=TPR|2+8Xc<-$Z0$t9T}@pJ4o{mPpfjG0kIAVebn)VXpI03>$oe zHvLcm&R<2eY%GGv^l}J3s*Cnp8{p}7TOo6wB9q*k4s{dL!9AiCPtI=y_B<6`Pdw!&p z`P&)vG16fk>u%A8qj7ZAn0=6Pc@!i~T|j1Leu5Ugw>Wv7y#7{5Ldhk4rkm7581cgj z-REgx^@HsYTJjWIUigsCrS6#Pwg-Y&#?d>Y3t&{3AuD{tG4oNKAi3vDs@KE=M=iz3 z@vkuXLmiBN?Eg^sJB!1IeDf@NoGt3&LAOw^RrWL(^65DNIc)g<@K5eyS zhc>lBtI=av6I%%x=MJII^*Y?DQwta0enzVg2CN~(6!!V`7Ut<0vvnU2Vp_)lxc;LG zW?V8B>fN=4_F;Q*#Oglq{JDeR)8hk57b%mvzzg7>5Dt}rFJaBx4!n_KBe)A|am1wQ zu<6!I>>B(9yhrMgn*BRaa^a(Bw)+C+&-#p=jeW4&LXQP+xkGkUmNWIqwkD`>G0Qu1C>h`=6k3#v9Xz z{Y1{fKkt;foiOLTgCLonMq{Thg!xa61v~4D)SDe3dp>G0-7l7`KEOek^1)nC8n_TA zj^czqBTa}o6J-6E@hTk_<1&OO{oxOd z;SV9gMvvVM)n>K}55lCUZo<{^J)qrv6OQStA|!6p#-Nr#=-r{n-s&0($pfB2KvEZ9 zW2Oz#x$4a0f->`7@KJmdufR^Gx-;25M@%rb68>JQDfHf^B>Y?ufd^MgSnm872>sXu zYmV#yNz+g`aoCB)Yzd%caW`qcvnaZq9ms0zDc{_74?0I0GR~%uHbaG75nS~#7q2bq0;Pc`+tZ#D_utJIHM7ZNq2Mxvr|8g6Fj_z~Mz2A2<*f&vh0254tidrobjRHDPduGTu|w5*E!?gQZW;Vyk*OUV3RR z)Ev2qDm~s~#Ir(LW6(vDWDiKqYh{RVjm45YQx?3b2gL0BMk_Q=lO_jeG<;A1%jT&< z)~#CdpY@A>;~D?r7k2XY5BkNr;D1yvIIes5KkK88S6>HyD6<6PigZD3{f2j0=h_m)Y+5bDa?$u5{j($Nc1K-nXOJ{Vlt|l`^T0>+%MeJD7 z10R_UfR4Cqa(R_}tWR2Sv#78?HA-}V9SXZiQOq>z-L}oECF3POBwZCudq&X~TMHVN z(39#t`AEGLCH%|z)}Z>NO`LT18Z}LhrgPr3(b@-NX_MkaTIqj=BxSy&ai`;ndU*tq zJ&w;^{ag)3i-mCmOZ<bv;sPoQ56x4(^Fb_oXF<4E z4PMtX8j1M657t_D(rJgZu+k}$gk}EZv-|^i6_0j0+I|7e8lytHl=4a0k%6T7)MJu( zR*lwJ1&QHT$J2_ldHg|(61u=a8GR7|3I z`BE%!GQk|U%}?HMjf1D&BlnL#Al)uY$oH)}P_`(D%${eCGUdIZ*0ASv#^b+feo+^{ zBh3;sbaxZ|2^Nsx{eo`tG6bESgi0pv$df#;_L_G@16>SNAz+orU-@W*SuR?nV%Snr z_vj_bU#>}m&MDG{5bxDWpeblKBNu3Lr=GBz>>X9RQ1|7FXqiSvs0=3Wd-uj?8>W(sVbSEq(hqda?nENlcg-um{F0ZB{wLn+ zrvyVilZfS%-DGTC4@}mrARnHz(bf|KP}Xppu3V~z4Sq^;A=VVz*C=Ch=pDi(^cGhf zE1>X08AtB5h7rm~X=9`XYA`RVgyqy<|1huG=T924SsO==xJKXIXd}B-Kg#QEJscO< zLd^%gqhW0+qS;g<$dV1=k4Th3KmQCp_Ogz|ytydq9GxL%@jG~rb&*tG)e(QqEF&3< z56j1{-egu%Ar0PXhUfh2>AS;7BQ%zfUANxRucMt{ovJ?M9y&;$xs0S*C=r zWl{9ZAVn6b^8)<_zlL=+SJ7^U3L34|5gHOAsdYji)P-ciie_EmrC|>t;9ME+E5E1q z^*0epx4*+*Klj3c$5ukghjEyu_X@8kT!8Gdzp=lh2b({`Rp8n_@ZY1aV^Z8fk{jKE zgWcw!s&X`O-7aOP4)tNOQ=#J7zb&zpzmCN#$K#8!v6#2olx6yu2xVXNNs*d>`3l!) zRjwtIS=;af)NGkkjTJlV-H67ildxH<5Oy3ZhPX&qX3!Xd9&M*6og9zL&uI(0E-SHB z!WEQlt)u@lsn@<=+J@y?T@Uho|{9rFTIsTVLRc z7{ATPh6V1a1@Aw-`S_j-;GJ$Br0&^=zy5p-a3TqZd1XViS{`h&b`y-qUCiqLhOAlo z28KGS2tjKUfMJw0W?i%%8qXkM2_3-rqTY+CI_usyr z<&z!M+1gBPVegZBm^4L|Ot5$ZKOXcEjMo{!ilhC7x`5Rfv2qugJ=_a^-8zg#Z&X>6 z-Xjd?$R}|Ff5D0G`hsqWolx_@TAts-o%=eDiyu_A8dv(Kv(;^N#w-IZsbcwXZ zf!-;ofGsD_VVK=Pkm-El=d9Nke9m15z3U&L|A9=5Sv-_9mwiTA&v7&acfs2?HbPL> zU%0KeK8rF}z@5rYLXQ6{;%9XXa?YJ1p0y&RB*xQagWi(rb@jN#y%C2e^ks(r6%fC2 zAB|jHi4FVpnD-4e-aFomo?Ltf8Wuh#I*E(u$-QydYrLkQW%dXzHz32SrD_Q{-F*qahxKAg&@wz{Jj{KOHG8Mx0G1^)@nY# zhX?udV-G>v`G|Ti&!Cax^x2FhHMnzL57sgI94^Rx3x8JUBRAp$t#KI!bFV&uW3%2t znA2LY-8_JukGTxN-nTGtW-3->{iMxq4?&i@i68s@D?AmWQ0Gt>edXR$@D6!JH9}^9 zbIDa4UA-PRpR|OZ-kQSa6WcMyY96t<;Vj%Sl6!(n)M2ClH)wHRNMpZfv#s?zuyptd zc(hxO5gU8q_YW6AqA;6wlRR9sXC6k*e+=#ivashKbAkIBA(ojA1YuwgrY__`#ZEWe znid5WCeBcE_a&{r{sYr_p(RLi&jAGGWS6)YT zfV^IDV0#myG0M=IfrTMsz2y8iL%`O6r_h|d2RJef{C~*Dn5E~zvA~R-e0CZ9#@vOK zi|vFC-z%UvMTt$lG#r%|oq~;?yI{*54dHUJwUAYFjwBD<4?#1`S#p&z8x__|2r|`2 z+4?{DtidPoWX3VLt14yAgRF(n+}}80l$F5UQsWg*nhR4WDB=0r84$a;1H;Ceu(cW2 zQRZ(%${<*(U8oq>Lc|1*$fk3x(S2#m*Uaf0xT~r!r_vL`C@G?<#CP(*!a1rqiTADK<>r3Qt?y z8E5Sy+ATH#Yncze`|%6>4^?5XgB#m_X#nH4@6YpmRR;mO-_g2zBHoXTzw+;xZe94UbyOB>`~f5WDorZ{YT96md34ypUE!Kle< z%wn)6D?7i9`b6x6&Up{0`=D=fj(Ue|e)SGNxb|boDN-hJ?xLF0%!HcdRlMz%(=dDI z5wJMoC|LIX1eFsjz@poXMbsT7(l?hdpd=n=xk`o8&00d~(^eR`*HXxOW5$-y9Q3qI zg}T8lU?a}}WMz%Pnz9j8b&nD9YS%EwYzkZuOofhL*T8*1PnLc+77y$&6RvCNvSAh~ zZ0zTU|A+tT{qOAJztl&U&JS7qj}?q~_@C>S`0(%IXUqN6VqGk)e4k8yRA@r;a5?tv zKTcB`q2wrbu=;xnRbn?C1 z#Co#aKgwwklLqH{WxqT@#_D~c{oF3lw+EEK&bdR3)mu*MULGeEFK_YOAtlcTVJ7fW zSsyC!J@2u#kuO+h3weIl;5ns*-n$(`qx@!&#H(-V#X0r#%B+0aq0vY@(`)FKW>@)m z?uv(Mb`fKnDDiumB@}f#$#ro{r3!5N@-GM!q-21xq$j{}y8!U~xixK^@V)>?27k zDa-rYOBX!ee-aaFv#5TLF50}Zoyhtg&Aq>!6502jnD9XbQuH#!0C6k{f9g#?9WsZL z3l9;c6~Ac&4X3rQdJ^xx!Fl)p7D;}nzxb3ZdYYq)w6 z^}>s)HQuE?E}4PbI(KmG-zfLXy7H2(&lBHw<>H#lUF5-^&Hye|L?WF)&iZ=Ll6+6% zmi?PvHGWQ8_7;m9eJ_zyn+j>);QlywWIOrQmQNJoH2&@jNeQqhN;6c*&RMDaV@F%F@*R;UMI@KUGa&W$4X#4 z{TB9>7W9(iUsC~9?e9mvdf0*6Vk!9Eo^r;zL+NIEaBAi?T^WNYjlnh;|T%|{AJ zV#HGN_1ShhE7Kh%-EgJp8pPYqlJ-`8NNeszle0L7+(IQdZ+Mjq z=j8X;u>VjEqmCM} zWs|;;^|Q5EZK@7R3a7!Bq!+kCvlkoj&>F{;EQR&m)u>{sDVTqK3q#JD39q)xaP}7$ z=DmD2k(K#~e=ahzYk1g}B>AaN?c?OZvK*aNE*xwC`1NXD7!}R@YVDkQgAu_0Fm3ZB6iv8E&y4;Ex%$-P|MU3v&E&Z|J{>H81|G~aiHG#);tZ{UZ7Zp(wSEAkOXD0`IUDte1Yo znr{VElT#KN`rM*pR~aztUdy1m=@`B|r@-z^sRI?xTG+EfL(sR>5pL|V5v)&X!|cWC z;B#?3g2NZ^Q!Ivg8=oT6X+T*2LaEuA>h{yzV5_x*yd1&vYcN;eEK&U6#W4KKOc}2y{y>x#kRt* zS*gHj>l4}HojBpU3Nw&%QoSoJP!B4z;-n@C9e?6(+a?Uf*w?@R)+?!H1aa4 z3maP>!(E-nIQo1My?k0jkgTbrLtmN+^IUIY5)Gv9+Xe`(B@S%ynci%}D@$fDE*-C^ zC<*!dbaAfbIaE$P16q!saI52S=r>ngIIA-Y>;vi`L!M7mWg_w2pvp>|hGEo*Ec|e~ zr*NgnhPmjRL%;AtP|SKjvd0fRGR2kU?NevP`=W_=pf|?sQ=)6UZo>7Ckx=Sgh4m+P z;iB4|$bDSlm7ZwFMp`D~{vJnh?79r-`1lDV7k3e*^TEj1zd)0g7W^YwNw}r2BP=O2 zW#Mw&@H=NNd=oi=JE+Zf3V*`YUNNxb`U$YpT8=db8t9cHYQg~SG=^O>gXd@au$=g% zn6&>V&y6YO-P(;=^WRT#gGV;z^A@o2)>BL?Uig3T7s>zTF9YS<|4VyayEtgcy!jy^ ztDIKWD?_!D^xP&1|N2}nfKGV zOMXvLfKAOiX^$*7%ovkP<1O}!QrivUc;CJBcY6%=aO_Vzk4>WY-q?U=dohhmPbZgF zI>F_ze&BTP8;S7_79Gh@S~ztJU6XQ`c!ldiS@AaUQ(r~M`l%yI`_3RTGlx9y^^&{> z{XfXe<@K~`+A#WBLkF0pIB%|T}@iaG;enpQ1+FkynIV_K4j6kb03o$PMbGY zx1_3vUn-SxwBluP{Z|{5G(-~931dmIQk-}?%nlwt-b>HL z_rjR_j$+%lUeI}3m8=?ihRpRc!k8;MJf-Fku_KyHZ5~XMewT^f_Y=inO%Hl@eF(YZ zkWEHEY!>5Bt|3L${ZI@TM7e9d$iqIGaAf^mYO5xa>F15Hi629QUd*BwJ5*r9uW%|= zKPXBf%Jb|$-XPn~Yhz>oGP0>d6^h!GDL2?veDl2zMrr6{;Q9w7MX?VpC>sbZ_h*sG zw$I7^&Qg*c9!(4`yU{OK^q|L#-k^KQ6cUc;<2H-OG+~agg5SFQC)p{_mv#KLllXg2A@NH0#H>Jpc%K1v2`%A*4uG;!etOX%8afJy$7#Z;}k^xnj9n!0rb z4SB9lt_10WPhA&%>d*%aSKg*$s5X}RZlGCp_O$x(cfu|5x~aL}1yu*!6T>&|qLu^C z(;gGv(?JRSKqbE)`0fqlpDw>i8;<1hNzY=aZ>WqM>RUn-+>VinA7d#0*cOjh>cbyG zG!3wPMmO3#AZ6~y#T!rigJjTn^2bdhENK}?a|&JY^ePib++t6C`u!j^=kvS*4{PHt zFAh^q#*6;l@qBiEJ~h4TKqKSg$b@pFYm#MTzTEpdaKi>-DjP%&G4J9LD@eRhK?_G- zC3XdCiOlM=SQv1M{6*c-a{oi}X^IW_>NZljR_zS3&m%~{0tdeFHKXHh=;1%{m;W}t{0DvEs*?U0Uv|4BL&ua~ zm~?)h_+rK>tf=kzgDcClO~B@v3o-vw4>q>fF}%7`l}X&{ zsr!%(*tFab*Niu03%1_??To#s)%Pa&{CEUWJ@!N5j}Z_cDIrm29FrQ4#+zBztoXxW zdSzU1);?eEJ&`BWQ1%lfN<3)^ai`CtGz1-+Kj6wJLwOb=9EXo^6>4pIf?c>j^xcyV z`8lKc$-@dj;`58YJmM5y3|1DRhnM50j@x+GI0`jv%%G?-9DJv|C$;al;@Nj*f@&}BXs3X|z&qLePXSh2s64yR76@rHJK(N09 zkGpy@k2ev}$J34RkpO9W%D8%>rrg0CiV8j-ps?pfND8z9?w$b+_G%*w>c8Lx`%@@g z6F^rF)nxCo9GMk2NRX}j=Cwn+2zF{|vq|#|nA9tZ3=4e$t-1y*`zV5X|8l^#Vtk@| z7uHc-!Md&lHEO?ONAX@L476wCf0+s=Hkk=~C(E_N0cCW4g(WK#FGKItfkMhuBMkRc zXJ@Am!?G$9(sjs%1=}&A9`+9NtNWAtm2V()|4-bj=OQHTo)1RtE5GH6j4ie6qc&p$0O$l z3ar>c*x>vQC-=L8CcWi34`Va7K>aGLD^d{d9aI!Jr6Nz+z$j4W??HQ%2{Sswu}yjI z%<7S)kh4CQjAmWrQ@5fJ6R1k^69*IDEr+OAzvsB#aXaR}*Aiu0&XV`8IzoHq1PnNP z740*$*ul}R!jEWU;mSNMp|b7*I4%2y8$~4+`)!9jH1rX!PS<0v_J6>!))Mya$2EAY zts?~2OeZ=Do2ktCgV*9`3c}eLO2F-Un3v;x9|QHi!8oh^5ZSO2eS2QUtFE@tu!nR$s{bsj$S(= zMRl1HO16FQiYed6hppp@>Cc`x_0Vd1CB_Maawk|jL<@TE(#OB1YRa`&Bwsg3F{-F7u#k~J+71~mFiVs+jBQ9(? zPKz84lRsTsY0-Lr;_DegLx0zkFN%hcTCRmP>*7VPP#3tHZUNr4nq=BW0#BqLlm^HSYFMex^&&D{CZL>`66R(__bejmu?UdC{}u9hBu zw1eKrY@%^dt;F|Re|pIFo;>U5oJS)JFnnA;YS$*kiyp>Md)0ydOl7oQ;)WiMu2?f9 zDX+ggoqTB6De^W>1=`+eV|dD3!k1f-(eo`RcRI=Q&;kypx_T2%t~E3W88p#q0KIg{ z5i@>UU}wrfK2`M#DIQ0~)o1f5cO=QvEwX@S7M!Pr7)RPm&1u;lKN@sk8F9^SB$q7n zsl>~eX8D{FgQz>Tz43|sIJ%SmTz8myznCXZHBdt1(=PPsU`BUeenF_`K&YxvBq!H4 zlbX$U>A-Gv?Ayhm>fAK(@^^Re4w)ndhdW`heKg;i-5;kqyrW#f?A$P;bG&P|NZfCZ zm*>|f@%g)_^BLdE>Bd`^NYG!SNNJ1#7C$ZFN4z>mIR~TM{%f6}ylWSkY}HD853z)Z zmAdrUn-r>-*$X2qUCG{udSI|zjpm=#_Np2>l?G-$r2Kg+sygZk9Xnp0>!|7rh2D9z z{kAII*+UOTSPddFpWk`=HTyt$#t9m{_5(fK+YSpQ&uNVQV$pi*XsRX8=k&ND1h`G`(lIn7m?2ogBu)htdevS(mcq@@sH%s(>H&YB4?m)bwa`QYEE79)3Pid!P zfw=5^wdfMgL5z2{D1BH>CB?(3{ftbKYq^iUStdoLjhgtS-UXg4JwdYC1$xh*CrD1q zfBU_DBg8h{S2nU>RO1)N*MlKRXDY& zxA3=$MDV(1ENnHXgsmqoqP0AqwsCw5R6n(2mcym&=@@+>BD|D%Tp9`TSq}KE;5l|o zyaW%dyYR`644kzn9PP4eNtwC|5#O|9Pob}n25o4VSqQNK7VNo>8S_0qonBSkh$BK5 zLW$8#=o)+hZanJG;!C3;Uw;&O7k?FR?EHd<7PLbA$h$Q3wF&E?pd85PZUU(HwP$IQd*hu0H^A@AWqCI2 z3#@7`gUdQ~czuUEJFI;XRilb&EDMH~QYDOSHAA1U7trwPCuTWy(uk9G__NK3siYVP zf133Y;>Pu3Ut6pNQ%z+ctInX-&;wZ4RE@Pi2ZEz&9hydZ!j>c(!Qe2CwUEUX4ArUyc>#EeOuS5uwKL&?Q!8iMV{6v(-Ah1{MyP)PmU1+kSb`0IHTw$gjR zojdNuMR|kTv`2U}&`5aZI08~aqTs}1Ey3qaJ;*cEQ0CSfUr*B(Ms$0DirHOUjru~b zUxtF}a1U7aWe{%VTQPXrOtM`l#NXr3V0Xm=dFJ#ZUYBUG!vj9;Nb+^%L zu(=Sj$seXS+(G))REWK-1)r7}Gh%rMceUCGGl%|!0jpGmil(91cDxVsTy`EF7Dk}q z#Tp2>^_U!cqsYph?#Br|55ucYOM$x(NULZko=I^QLKCmRqMOsvE-YI<-!2YxMh!*l z*>>`|a-m|t^K|;8yBV8REAZL2foSyiD~#CLNXrk_L)1+R`0nm51Wx?{Z;h;lf|oi1 zH)O1r;mvxasus+m$((WRtMexLnX(!7=7M3^DLCk7H-0}kY1t%n=lx61~1#giECHUu`tBcu$uN&UAYIsEcFE|B}TrMhwr@xdS} zRJ*gpyw&(@cocM5pNAOp3nZ!V9RBcj6*f)Vi?x3mFmaOuJ{qMfWWD!-oSZ+vapqr8 zRrC-we>w`TkD}q*RRzJQrH^1)nusx9CjS5PmlglaU%J!(8DA;{P0~5bgG8mJQZ6CN z%i?8E(77^MEa$4o?8%ns6Iw%7aGEgfz6$8MetiZtYkDksx2cJ=oOTf1TL%%( zCKogwaz;!aWsZ(I&2+T|qhZC9sNE?Q(z!cZ^t3a>y4#E-Xj(!{z%_oj(JgANc9^WW z`kHDxXH#Fxb^OLfx){Cj8T}OMgbGu?5=|RJh?u#bPd{Y<5eMAyWNjMZqhe{+NOSb` zZ6UJ_ZP5Gmy1aF$0Uwg2NJA9hfr0`^L*zN;_D^}io1NvHYQ5Mz<_kU9e2gSA1i@$d#~D)Hi%69h@t{Lwn?N6kUFcAz>eB>iX~G&XZRp zUC9wP^g2WTx>rS4>NL`sk?F+aw>@dz_k&d5&L+3MSz?mYBQfn^IW<-HCink-LVp;W zV((2-n6j=13?6@<#AYod`L!N-(G#B#h0ABDds$za8g+&mtu}{S{-)Rv8zt(be54!v z`a<~)RV;2@NSrRdCf7A<$)P#!I4{TqmQ`Dj?98Vmv+5?TpL&57Keonak&B3}Z#{Xa zpGLWDCb?U$C6G;Vujp|fYtYc#NA*{lIumkc zr6qU=^~Kq0T4a5bGnTbfifa9)S>Up}NLD?9Hsb=w5m{%Z=A z%BG9GkLg43rLkg+&QJdOjFZHB=35#+Xmj@cK*<}Ger|SeCRQq9H^)iz3rCZGZ`-YeCs#l^@Og_z8Xv_y^b&Hag?|H-b z=;O*rTj1tu5s#6ZsPu~~!5}GiCL9#)dR^c>yMB;mvEh92pM5a*0wacEA3SCyqe;m# zXx6WBq@nKDKl(@iPyX_s@#R19m)Y6>j4u(fXQ(7?4rZ+$L!{%MVAu327<)SqJw|d^ zUFyaZoG+knk3Q_Fl9ABI+Eq|!?MAjG6%8%488=SAQLj~5fX6N3UZ{doOnM7Ro_hG# ze0QepYa)bCUxh1Ac45=3ji9vuPiXyIBcFFa8P*+8WBldCFy{6%tdjd1t_!}Rr#$yY zE*i6lB{y&@OM-j%`m^H)rNT0EeKy#=2;`Y2h$pYeR{zKFtK<7YC>BGE;;<5#Tpzo^bM=ul zajymRYml;%7lUAQ%qgk>NsyGRh|6yDVh=Bv3#YSpqTlY5a5vE?*plib6vca^8m3$aMyV*_H79%VkX4XM}1y`UxRk?wI8Mg_p?k zNr2-;a=auCZ37(GY@;Kvb^ck{us0f4_fE!dbIjPakxoL*vIx){^&TwzHHERK7vj8Q zsc13TiJj2P#|`l^yfZux52YuAsizvA?|6cq!z-X|rIB#{^G;Z_+nSXQOas00lMpvY zo|Cz6L$?Rouw&^AUX=9^ZoSnNnm^~ljCcjXQ_+nj?CmXhUse|L!z+1J?@m$&%52T! zBG6oHDy*`Nf`-yl3}P#)7@xN22Pd&X0N(gZZvG zF!Rr4B>k;euf6^8=*9yOyniR`IT?>7uJfVt=29%acNOMmy@r>(8lMvpfITOjpkjzM zyXm?)TTrjcMtWM{&Mb8{&#Dp)eq^F!i6Z}Z|Ng(FzpVS`{)Nl==lZhoX)sA1YDj!b zoQUXtf5v{?0nR`?oyZ3e;wkfkYL7C2Q9XGiZs;_UwSEIVc-0+6rfzh~+;hZ3qKsvZ z-?%B0-VqJ=G0dUFQ#7sbLDHd?OLthil3?@MG_-##$@-0)Q&J~=uznTwvF!_EqV6y| zv>mAZ?T7SThTz?P(3#)hZ%4BRtl*elL)1}Jz^us+xzCerAWO>tr*9TK_M1G&!hY_k zcqx&5h@kYhg0SK$1M8EcnCt1EKwC!#lnNN<8$p!ZCr&a_m ziD!uMk$n0(>H|GsluuVZ+(cu7|Kh&Yy9wTbv&3MpE}6H|4t`AEMo>@L_*qJA4zpnEaE9nDyM5Lk8eTACX}Ev1Qt8>y(qG%r3!la?L#XB>Yoq7&!TllYTm+>NAosyO%| zow4>4310SvtA6*37?m6#kMCKcwD%#W*HKCL8M)*0jMG%)lJ;*@6blf)WfLrXXsfuc?^7K zM7q9~a*`@JdPdlJt-9k!f<-U69j;TT#6X|Bx#KrYw~P^31vT9HjWV$ImOE7bWkB>U z=aaVSMP!)Tb5gZQ1gZ;Sqk}#sg>5wS zv4jNLe_^&=kiqY~D=MVyp<8yjW3cHyMhE*jw6ZDR7nNUv~40)x1 zLD$T2;;YAGZs9!g#>Wx6`e$c zVa1X8uY?TIh%sdH1pWWqPyef~{7dKkKlU$wuP=Y=UrY-A*b~J&Q9-!U)wDB&go6ywqD5QC0Oxc9~}8{?KZrd<3t$tJsEb? z0^YXjM!nb`Xz$%chs89Z#?)FeSxc5(@3$A!tXeQ`umV3OSrKmyvtffr*W&E61z_$k z&$=D2qh%h;>EsJ$Y{^Sces)r2&Niu_}D@6!J)I)3sU)(z`tB&q+ilJ^?X5;h*O7v;j0$f~C786U6)ukH&+a%l zA43au_z?Xbx-eCpopQGtw@($bA3C+z8Lx67D?NZdNO0vx&URq^PfR6S8tr&aP8%Do zZvcC>0JlcivipPX;PVtUHZ|B3eRt_%e#ko*q;wsZFI4CMtnp;?4<5l8W4nRV)x#$B zJLvZN6%Ei01sWwVqviyXOmAQGE!W|9Zxt9kX^oiNp#}C{;qGOWR&4=w z6~b)1FO07DorQ4=ig3sVQ(iuN54KNwgXxkg(%`n4E*sf^8xOidpDWJ1&h7yCIMa*I z(ru$-BKz&U{5^6Ef;|+gkb}_9#6Krvq@XNg701*AkQCRP}w&y2yUdcyAI>f z@qO9oerwU-xifF+lL9VEcVWT;PuBOW9}!=u$8o`?yy*AS0=4E!&=4D9ht(9+X*&j< z&PMFe=lK{tXb);U8GwT~`N3!>ZPtJNW877m0eyG(=ZBthW{*8{V)Gl-`8T7YaN_fC z|5;!DRagF1XGFs5fAiP5{h#?sJNSRj2Zb9xBqH*WYkv2Q$-P=lay5#H%gIRM*n6J7 z$S)-R^HS(W=f9|*Q8@`;_JK$SW^)p;Zb96Z^`vCnMk?+1+DBtG$8=$TV(?3y9DR9& zM%%Pe`&Cv%+PR*p#m3RaTGm9A@SRg#_mDUmToqUrZ@4u#zLDg{W}k)^ww$C^cdQKwJFgsHzx_P^{VzGJ_WF)EX&2(=4TnS3IW zCV3~W^CDO4nn>JKHxl_)p4{AYg=C30P>FpMjZd|slF^$;cf(aGnz)>d3#p=R+QxK5 zo0R&?8xhfu1Y+R0hl&~paKT%K3wwhA;+uGy__+)cdfiwmDVRVLj;9b`FKL0ebsJ}2 zaEO%6{EM;t+)e|RiMiklUrCpi1vQweOS*3f%$+T_Iq~_+1t$c*v-r(WPI~%Vfp7ox zf-Lfu3b|#{X_7~ipZn9G3qAB|;Q^{qVaj-A`jNQ?H8hT$Ne^#spxw>iNuA6zBARlN zQL{}X=^wI)m#;r{oPL3{s|kI==w#Y3b2kZeT}|^hKP5er<4M5YNV>yb$Ri!)s5P5J z9;y5!b2lUPn$d@L&HY2iH2)&f*u(-8=M3WA?-2D{P(zm%T_G<5GU?%r1S&Fj_a0%H zNPUfN`%D|8Km+E7(D+R&NJr0pTD_;4s;M2Jk*9M=jr#>Atn4j~9GAjnCWaFKL36k% z%Otcx*b$|_Z6&JeK1BSsg0`2MGs%D5WM<2~A`jAk3wxb(rrp|>Ht+0a9^8r(>PI2t z6+E0S?sO&9^W$myfimvJg~zn+*)V$WiiBo1oTHKJy#$ZV4r+V9hz?6TN`hkIY2fv} z%sG<`B0k#2%@}xxuG)D>@Hq#N+z40lDB%e)`I18IO}}yRuc8^3^e;q1_ZaaVa-N2O z52F!oMHyogDt%{8t$)kWDb_!z^rDdY^G+er8xo21%F+Vmz#~+C=y5WC%6F0+Y{vu+ za;GtQz9c-_QecJEQAMY23K##-^-9O-Vw(usmVARVd1`nXg)fU{gBL~Q?#0=_n zp@XQ-zen^uQ;2GZA&I`PP1An{5Vw6Z8DI7JOm4w4L05ZE0vE(Ebx&;wu5YFWPkU*@ z@^{3HUZNYfN=Vb%GE#RXnW(HhM*Naike=n;h}PQE zx}04kcgZ)V4#&}u*wf6TCUp{b)0Wn}yhH;dx|qeMcTpGDLYg{jG4V|~$kn_&K;^g2 zrCG%WbnuG|PSoU?CzUSnk$md)jx4&#WgV5L!pE<`N4m>|e@v$h1x=jz!dOz2@sUo1 zJ0vxH5!sQeOvUf+)8_fj%!AXvh;Mrp3ICHy9xaNYYsBR=a8xF9u<#_Q8(m4cQdyeZ z^_-TVHk~1pOP4xRI(KgcDXV%;?d3ZKMqn$aJg0=2_A8!b#Ke=E20~q2F1&ZXxB93} z`AwOYMly{XOZ?V!lFfHs)A)*~T=R){%*_L>!d2*nHR9nXo`A`voyf3tQT_Pj- z^nx=8NZ^t^4;M(oJ{EKcd8V6lR+6xhMWmynAMG0Ti-^5n`t(UM8^OwGFUWXu9MjI8>EG=ge3?it+{VFF3d&_iIUL}$= zo(T;Npo&JvXlm+g2X(#T} zQHjTi_sP#xEht{d&W92IA~~)u;5$ik$)f3Bx~NlTHZ{0*n|r#hmL3)6>Ke6b;=i(i zK1ip8yL*Tl3^k%-N`6q2XG=(l_z`EDpF}FnwS}y0E|q9sC4mzHxVmpo1=g0T;B)K4 zh*!1JxTE$|EH{}f)?7%vepnK5`(Dyie2N-fiTcm|vG0F06vFFYea3%$|L^q||2@Fe z-TUYMB1CmOe2O@X0rwk8OsE9&SN34!3NZ${%%Vb1OGdvrfDK7WIOFyeNS}HLwD!D# zdv&(F)zXvr|dTjTyOEB-EEx&p3CDb@(hvqH9ym0e1 zDs{zUk<9{3HcVv##4|y0%3WyF$wrIAs%+wHM_3?up|9P!iL0c2cx#`*I81vr%1qdc zvPtT^TMv*bUn@4?Pz!xM+KjgpOF*%@9@+=ZrU}Wqyw3E=7%%X?G?w^dQ-&Fc1N&pn z&RV!W*@f*n*^XWJdr9BdZftnK0eCsdnQbUKz>INvOuhZ1G5J;`9WqLrtzZi9*XRON zI%~{2?H>=hWBc$qeO_Zjt1hSvOUF9dKlnBM5sJ7&J}T>;!O6u2y!#$U)><+Q=YL56 z>C0IpK5iN$r^pgD%z@pOPq0DoL0=GCu_nPI(5G0PEt@ri?D03^&2nO)sP8`X)0gAp zGuz4Knnv{3wIsd|f0Ae)K|LPl1k5}ZlpZ;-yTWV0{kbEnq@~PyQY-%18)vq9(|gz> z{{uxQGBtO*1}VR4NJCsl@LH2axYG88td9$mFrc{H{|Cpt@-RNIzV|s9Bb5Wm6Qq zJlBt9+*r&o>c$a$1W)Ptdj*GVoY@C)a=g^}i7;O)^U6(f@btzXkO(wcm_%SuR&T@>jFbJ2%-v_`$LPXNflYkq zpdK5NlSK<2yRZZ2+4Cnps`6&vqS0WE73})l3H{%`7Iy9daBkjj%)Go3!xgPDCQ_C+ zNIyy1SI?w_n;iM%NH1Qt>oH)A4KJDhiq6J^Fm1*KydI>+&rMna!9Bmps)KQWPQM{% zD1`~RE^KnxW$e-%0atys`RuG5wEW@8KS?|aVd;Nh7N^3uJnaRoXVs7(cNLT>`m&O| zO@g;{AgJ8C0^*i0Tx0bm@Hm?d0~~C5qeZH`bc-G@i4s1K_S+)y2*bgL$Km0ve^8BH zhPLIoQ26Bov@a52$kXd|T3!c!h)cjzzhzlXVBk&nFYpxaVTmBZGt**z#ZT%r}p(S>!HevGeyX1G88ymX+0;>61@?n)b zu+8QQ#0?vQryj`gt7`>a`>+&~Lp`YJ`TH0d`IYqWeV|_Y8$?@V@$d{U{#|H1BoEue z91NzA+c%30Tl^eSdp;u=^5Aqw1#>;lkavr7LBeeSDYu_)J=UL(QZwbJDqHjSM&{t= zc~Y2Y7>!cxr^ z!2Y>@-kMbiR$$*;Jc;+aZ1~#wj%>z*)8JkI6u7(nAj#ttCcQWUog1rh(W6)t)oU{` ze`GQ1P88iC?|^qFJ;Gn}?D!X-m!rMA2mX9v!`qw|_(T#74BVY6%zK_FE=yx(79|`c9X}L5J+fmLI%>0pV>jZR zCw=*gUo7}O@=EOcc2!=r%L{z2%d*^aFSI`J2{Y#n!q6c4nEFa6_X2FOfD?s(C3x7BA817o2%&V!FV^_>*R5$HJ|Kvq@ z_f@3e*Len$zNxe4mM#?D-x~{DRRu3`Vk`u_SqrD4Jb0t`@sKxNhLt^f6WVUxf%F1h zyt#A~MhNeF*?Alc&C16y-zMYu@$$TXe{+=Y?GJiCZ^D>~dQd22i(mgK$DVQaIQxPX zZ(Vl+`%Uy@?{2VQMPHWrbcN3b-BoJ5U8OspFuoF`bz49-;jyrv+6*NR!$H!B1#&}; z__T#Zu=0opzfJxJWbG~>=8;ycwBA(Csz1U9=pzk;ObVei* z-TIavv8I)d%qpOT`(IJB^a@fw%1zJ{&6uMXgQ)D217z`FE7ExWF>w+6mbpR3^xU!+ z#B`B9N*`@0*tX^uDIH(|EtMN+Yo`?{-)jZ?j=ABx_TQxQay@zHltz5BvN@f!^K|3S zjl^KxBbu%>iHu&N2!Rmmld~h0h_bcmzDG%vb5N%FL0LpHe>wRaRz)3`d=fZ=dk832 z5d{lt3_mr5=9&zkUm0IdgHe6pCx)jEQ~W=B$eA%+;e zOrYDY9VO=k4|~Ul>om)M16et%huRA)nWFytX^3Sk9r17-DUt{o)rfao@uVj59^cW# zNPA53Y#_Q!8U3`$9b8-v2u!HQ%!=C0w4vRI89u*1R)44_dLyq<71IvVT)LX}5o-V9 zjadRW_z_b)@G$u!c88J6jbPt3YbbLaPA@P?)OYn)X2M@aklZ+r$v9(;VlRF2X}cA4 z4_QaI&Amdzu3xzs-_MW-s;#7JwJmAwH;KfQ+~Eex`BLlSLrKQ*mGqgw5I>V_juM?e zT#5QPk|aFir&ee|h_L?{cftlM3N>Jokqzt!ilVnanSi))Ux7qfi)o9nMd^x6##UNS zU(~LlHMcZr*3WY8L2C+`uiZl$o^Ix*rSB&5t6!49b=sW!7{NF3B-p!L{t-3Smjhc{ z2~koqgD!qDIX*%OG%FM!O#Qbo-^Lbnsa&D0X|ZI?T5W2p6GszHpCIx>&FK33X(U$Z z0BzCLLEp;BBv8GS^B>gCwKlkryfyNW>-U>J>vn*W!((Z>ys5x@T2CggJw~k?4KQYv zB3%&a01DGJU{Hz$7|ZBk%2Erg^wTFFWv^1lyP+P`~IU`Rm>t>YE+N>=&2&u$HQ~vhqfvl?(2w~E}KH-d}Cab z_MV)$b(R`hD?qd82y)#ACEBWNq?&1*8yDtv!GR^&4>T>5<}v~ zVra`L8h%s{tIbPbwyO%?Jfj3TTN~Uu!iF6ed=T1OHq!?mUc)*2M{sL)9VVD|K+R); z*;*i>>YIg|2^|7gQEm?ttYpcP%?X0P{vK*}^kJXazJe2WnNaUg2%)X*q;JzPJbL3Z zyfrswJ$?K01DCu9Cp~+9pzRBQO*i0%sRG+x?1FT&8NaMfiDl-Ppv&+aZ0XsI8bjpx zjAsp?nJn~dmZ~7%HvkN(yrJ~04u4edF1%l_z*|agqI<3z>l|B)`!_|v%}d2Newsbo zyzLQ5f9A^fX%?}&ZCzMjY^C7Y2w`_mVjyO4Wg}{6)|Cu;VZJBdaCjUQ-QCUI`qm)i za<7vUI=|rMmVLObJ`}?*8}mi|^Dtr1FA#0D=6{rUv5{7zVU6Y!82ZqQy_wO2b;~E= ziPBe~;k}bg9@dZTw@!xlh&hJuuBj-3d-)&oO<1+&F0@zD;~(Y;`Q`Oj zh)&g1ur<8~_R~}`c=8Su7px%;tF75-o2_B$Kt+}P5zD}DZT{7&Kg~sf`#o0n8TVTc5sbgKG3v{MDg)B!y z^1al7&z^J@+wHpu(_sN0Rtq`qx1&LQv@%<@*_!w7REH;tHK;c-595YwV$)6;3@ugS zCBwh@w0BgH-EB5}euO1&IdT>*6#lL=o;`pX^=sta#a8GqGG&8Du&9>S1{sadA9)HReLLmywJOdr`bT2EHxP zVF%it7d)_G_`uqj51QB_{6Ejp%DNxhaLJe&t!%-~TReH|TOK$s)tq%&W{SnLPk~xi zF24S97Y_)Tp)u3<;1HFQC~a^h_n%!sT4}^H_XKWKKv{m4I#2KKaOG83u7_((U;gHF zHHbO4nY2&pA$l_Xc)#5in7sHePBe3=&!fT-pqr)757Ie-moErAB&z~2+HS?$Z@0j6 zm35FMCkr3c1paG~1G(E!i1BuhvEfT4342%$KZ>*P_&8g(d5bN4yKo#Qwq}C+ZB0JA zJ{!1=lVJMD+eqH%v-&60SkEvw_L`+Lzig@nJ4NmaB(s;8Zhfk{e3u#yuJG=xo3$YgkT^iU+kN=tMw`bf@eKzD!c)&mh->!;GG2$(U!lu4vD+X(!5`BWeH_VX1^nySZ}96kc8@hr;6&434cHF%AOGI$j9c*nmU z!{g3={NWJ^FmcOmQ29}cQ^HT9yxeBUa%;ghl|vwjraob_??axPA=^@V144H6l9I!D z`1JcFs0tIa{Vv(EZ@#*+eTu_TH2)TwC6){CMFlq5s)^B9{|qg&B7nVp6m69s!oMl0tXp72pyvMWi zU?$#zF;Df0_~T|_Zn^=5^74G^mWfzW+eE*qxUszsvi$c_MRv+k5nt?wIPmHj*!o17 zkGFTi&FhlDxyPMd)07L5ONCzg@o}&W-Hf6IhYG~bcD(4_ab_JRpcT6E^3wnVXUwKU z&#Uqy-m2ghHxqWGZ!s#DEkUtnI9)tJozE^5*tPxE(XNOxvgoM=A37(8sM!3(6E4YM zZ#N%Icu(}4Z^wqWS@4BPJkC|sh2D$jP%SbVEc^Tg9sPX3;v-Z$G z5*gM{^FGcV&<6XJ?D%5x1RpHW;NNRe3iH3@nA6SdE&*+-*p^j3_S~u@_$hMgDt;xr~!K= zPT);A)zVFSo}y%V4fA z2Mha6zmcY}WuXJx>^%;ef6RfVg`*(hQX=?UEB>cG`mesi_rLm#|M>Dh=%fE1zR7>z zuj#4#=lP{8G?nRE5l=5vIfAE*Hiiy8%K883L!{QSjQ{*FBDdQZ41OIZWf#73T=6za zQ=GtF_c}MQK8f5|=+FM<8vSTMWr1?dURpd%}-K;`It5;JfNryuMB>6UvK(dGF*ei^nT zrmd55ea*1vT@W!_(L)B5$P1j0O41x8ru@47RDAFd_kOf2?2!FR8GR8AGLZX zI+-NDeaEL=tk`m8lxk7bK!opWF$vi9y0|M)1#5w{8_$JJ$oG@rt;5 z)DtTGW?3-U(t<9Yx1Id8FP=_ucuqZsmr+j}7eQmIW%f?jfk|UcKuu>MjhF9a_;&{I zz0nQ-$o-~Mf~ttiu)ZjfJ?wLOt0Kq`K1Ad5W4O(|v2IuGCxQKA5^ZU65k^v@YGo@t)`H^ z`~8_twonA@ql8b^6_T`-!Y!7TaMJd?f{0!QktsztXx1(}y!+hcPd zd$MSm97QG=%R{zX4e?#0T96%foMbOtNynHUCjL7z8RcPj7-^LzC%G5QM7=bD!@~0r z*n5wPhRkIwZ+8$;iTaSGH)m3fVFSr1vr7bfJILr58>kdB)Ml#%#6P>vDc?xrQV(m> z5ejysEU$!~_A*BUl_f+i;sVieT}~$rc7?&gW@O$Tdx$@BiF@1U1U2X&G;f$INNutU zCJxudu+X=(q%?vHm|}(<1N!2^Cw(!uDS(#EKStgSen~_VFXc;qJoX-WSOc!*UZMe> zrnKyhA1xX`iSF*QgQBf{Xmm>{-80w#Bb_^FZeta<^1cHOOFT;adoNP6{F`*_I$2@H z{+TaXVn$w8C_&+@AH;N!J^1#N`z%iIqJeY5n7+Ftv{1Q=m@hg*b-r5R8Ic_5NhDO` zpffdbb|upc-01Y)YX z8b{xe!pL8k`dbek^iIQgaX67yXLFN_OMCXYHt{J}scNGc@b^m(YzVZ#4WGw{I z=}iUi*4zY{SJv$0iHf}3ep|L-Tm%@+uY}N45ifh;Ehw7&2B}p5Gd;_ccQQ6XCrJ=2 zEbq(vCTVcG<8Gt^*wc+_ zrS%{_zY*Hxn?XEbIN7f6#fCen%j!6KVa#9K;mi3Cs3Q2PRgzM`@u@z4^t+%N7vHB*!i|k- zLv;C%IU+W3&n}d#G=X;)A7bZaXMTjf7|!okV8uU%lZ$6EaC1%)_!Z=(AyCT>L`!mL=Uxw9O>_=+-AjjbqX?)UoeD#R*)MJ59Z1*{ zicNDCLh-5=5E%_(*390G0YwP}Bo|>yZUiV_oCtZ>1ZMuKZCs$pgnuCXo|}U@sp|Gu zxX;p*uTL&Pcz*}PpHfIj>_Hk8Xu*z4UX0^@M8JwqX8hbvLmXYD!)o-`=Hn~V$dELF zaiAt4U0VZi^)d}!LFqIOENz5W*UNC*!d9FWE&QyHJ3z7~wqWaxTiE=gmcIM;6gF#g z!J6IQaP3uFKIWnvq{VpgLCd=#=WH+3INT$N{zFmP@sgHI^a0D=(=n!{mjs4d;Sk=1 z{kQz@ZZ7}7{3Pz}Ki8MI_d3+z&tzgWripw@wZo)MS4s4x7P=~ECz%xMjO%@ZXm@ry zO>s$~XErE7?ACL1z1uaSxJ(9JYx1f8=YHIY&KD%6cmvrzp&tbPoJ^%VQwyrYZBa|B zo!mHhpAN6lhJy{-C~PFj_GR`k%CwyvUTc8jO2I?D;|1O8+7ITh<%z7|UkZ&D(?fDP zAXU{Y$gxw!q|w3jezGGPZs1 zktfkfWc+JYSf^%ynUNz&dxJc;RN+2N*w9OTzwG4JCKl82L+;VMQ=h5nK{fOk8c%Fc z8=5Z3Q|AzYuMlTMK{1DPj5MS_6`JXmK5;bA<&@9ZNNdPcu%`a!#&M$TYk8|SEFl&X zFVIJ`T=BZ~KB~Cy1X-cpP5lftsQ2vmr00$z4N!l{To!IlCMwxt*x3U5^p-yQw$(7w z1fClB-)27DY@|W$9=J9zmX5YnhgDma6Ul3TpFi`g;NoC2a5c5So4g{P2vfm;>vw4G zx-V1|IF$M`f}18D(-ZOv z%TIU@v$I3*3Rw(M{YA6JWOHYt&QQ_zS@{+kV$6{FKvvK14<428g+53Y?)56exhx9| zne>%TTWCYlFSKxOjzS(^t$`eLI!-6c8iPlwGp3jq(d3Vd7|HSzK3Ouem_@e?Va!KY zobmh?`RS?$SIa4B_>@*qXkd)3yFBRr39m@|HYK#WZV6Stm0`KS(3D0PV{h3es#o)Y zSd|&Tm@L5uHOGyPu2F&VwWn#iaFc3xts$H{o=Ib9Bje_7hPiX{xUnrtV8)da!>kL` znpDv`8F!N^WL+YskwE5wFj%!ks2zi0k*g@wTO ze^y0X%gpFfy{{z7<_Vpco=n2c+G+3YCqz^pM+!8{DdS>*8a)nNd29u32vxwB2Q0B^ zd6b~j_rsPyM@Y@(A@rz55p|DqhBf`(68oGBoZLe%f$4XTF}N!y{O$T-d*?6imyaBb zipnA7VQEy=Lm$75@1$$UMbhLT+|imio7?4do*XvsCH(|VJge$28ePDUl2aGx+(n&K z`*0Z5cu+-8&$7l9R$l0}rHyWk^CXeR_1u%WrZ~=3LM11&nY8&;bjFA;B<89<_u1(d z6&XDDu~hp;vhL2NjSpIB?UVv)vhe_Q*?Efuh8oB%B`UPBMJlFPJt4) z_w-3zC3wgD#^A+$p}``KR)t!#qu(pBv$+y@*HsKlI(zo=b3<0GEC4s0lw}Lg6bc!E zWHNe>86Q(f$gn@3K|1vz^OdZ{g^Lt zV!4Ll5S<+jhnpH9{<8>tH+w)@c@v1ZTyFP0S9avJqo@>IhjXquup!GmNzrV7_&Tl& zGrZd%rdr_s-__+c-k9=g!EeF0?^Z5ZxWk2BF_5hEo4d7L@DD6n#XKEb2NUBs=$@Mc zAAjnyAHw(HIrXdXqu*=1C3a#zD;Tk5?{-6zo(M!c4{%RDeMW}_FFr)b@z^(P0clwV zF-WPPv+PaSzz!uWSrtvS!mfcRdG`><1`hl`t;Xd}-*9$K5)QP!hfbizhe8qY9&sIV zwzWdGUpv~IF2e+Q4gPnQGh3nI$@&{JI7;CLvvR@a33((pXfLei3Zc8< zEzFZxvPuIr`QZ3iz@`saA(dx+1Lnf)lAY*1^anDE8Tg}o50;y~M_-c7l_gtY^|2y2 z)L+<1XYM8rj|5&==)MBS;3{m0m_u==4PP?%7B$$HiPehN;nNc#vl^SqSe2{s22l<; zqwXn4H%C%QOP$ZflVUzbcPMmdKEu$5cZuORMRsw!BJN$P$B!CS13S6~peS;rxANlO zG~ieuG`l^yvm0Hkk~jf z)q>8HUZu$AsCPS{gU{=hUXwkB6Iu95v$^sy&q1AQ^3j8sfD!sMC*WatV{ob%HeWIHPbT_TA( z=xL$#dt;&)^p+%d>}1@Wr&1>`;r8lD8ZTmAh?pOhO_%*JI3?Qm#GGH&U zE^$T4w0iGto#%*uLL%+0+(hlR*`Yj z3oIoQRqv7Y>$PyrlnU}rnnt_)`cR9hW+)Dw!)Ur5p{IB1!_}jIXy7*|Zk=#HCivE8 zZbG{{_+N3Q%I-&*`Z8lQNzg(~M-zzF52K-y1g6vGIjKH!lLUIaV9wadz~IMrL@uf? zG6BwX<#P+5iGA>a=^s)#|10f0(+6*#GK9JPI_Q*}sbq$(80{q~RFrYVr`cOfave@E zqS3N+zIGXL9VLh1>$Z|qT~m_QSW0Y0dV%5V{xCSvjEc{*Os{PzNecTx+M|XtD$;IZ zxmp87LAQqdS>8eoM%Oa_!=5lxYKzEk$9VeGQc4n!?I04H&)y$a$V1n|I7TC38F}{p z8SU&jMJyd%F!6jREnJTz|3odBCiWujii8Gd++*HYTf%|o2k5q-QW7~cjd^GDjZ9>F z$d(&t>8ID_)I3XIIJ_K77G}tTL@uJh{IUgH`mKnk7-CtVk@zWQ5f+`8X_j&+}y)kcAReKl(SPQ%e_oOqcC zgj0XH@Pn_^lHA32C^KLp-UdXsIk9j`BO7-$RY2B|Aw+t34|SRM3|o48F>;VOo_wLg zzR9*`d!+v8w?-L5y$*ocC?kHxrVmgtxIeG4$%^`(8$p^fuA;3&73k%^fu{pM!m)8j zF@5ee^6{=B8@PEAl>HWN4#nr?ON-N)48<(C#P(r-Yt&qMQm1L+H?FnPzj*&4_zmx9$HBh-)4Q@TZjnn-1!~F;`KT#$feHRX3 z%w`>jAyYZ@pV~`PLLK;kOK(VfTn7!w^u+2%zaW2Q1GHZ&qh}ip*o5|bu+2x44;}r5 zFi*E&{KW_;F;L-Gmp{SupXzu(&z#N8c}3L#n!VR3KGk-wd*#7+Kt2X?anmusqTqgR|+p``k_Ta8r zntV{oPYhl37n%sUxMpK_2y`Ehk4_2RLoJqxFCBxSM($YPpu>jP$xx49R(#Q-x!~@X zhAV$K^3zUA@rgz)9&a$?T^2n!U(+N#J-UI%4c1JJ@JwOzM|D z7V>gqX|TpuCRzQHPk=%sGd$=rH5fFG`j1y2_q!cos*wQ}CUHdTjv~Hry+eBY-lY3Q zv1Ie^Lh^g6IV6hSQnwaw#@1X0lUK|qR%v{g~)Yt;Eu?&e6yAsv(sYhyN#zdy)$_A3~*`w@BZ zA(?JVXrdiEjmg+2TF|U5-2Av$!qhyB;HsV&f&ImBk{lJn6xQhDyr0GxbNVwAm=NnD z_gM}672>!!D@rxqhjRE!O1jO0sf+X}HQj%ouFyJ4#*1`7wxNnF6?S*cCytXv{)Uk2 za#rA@-=}WhI7Z##5dEQ2PX{y_qa-?=dAjj4@n7K1?eHIofz)@FIp6njTuzBb z<-O-|`>YByo*>+2xcr;sta|~{jgH_Z_+Jla9D~rMainX*W12h5K;R_YhRz--#I7jA z&8~aV?CC=cysLs5?|Nv1^&Yx(SqfelCBtTZ`i?I4e-P)qf`Fzd`bhfFp6jw|mis~XZGToH+5zwF7km&ob#U{z365FNffF}3a z$o10hB9)O`jXFZD~RDuPHMA%-)6Uh}ksVSLwt%Ox`05qujA^!}QyYmK!_FMAVNB;#yFQshT>TWLX z6QM7oHxoN*zk$$V173AkkJPLe@!A?geh2@8wss}cGC9L`JZPpJi#>#Fg*{(CWdm-- zHPp{Q4DYKf`LDD_>_PbwR#zTGRiETzz|#?8>e549SBv?ob~l!>F61@_zr`lMYtM?Q z51Ve`%^qnxOO@proa)P~ig!H+LAKG0|7d0aw5c0lxY?BL7g&k+_M7mJrUegE&>oh& zma?0YT4^WBSmXN^eBs>_0>9FZ^+`2jLq6=Gb~cl={<)5-6iBD@s>nfoq3`9hjj(%} zk{(&(%$6ng&<8Cwp#4E$NzMyd#Fp>Kz1}O}S7bn^1EZj4T?CBDj*?@q7qiKYwrp^m z34L2EWfgZ~0amp@K<;wtI3j2=l9gn{?Ifs=dGXPwM)7)WFO-H@!-(!{&?Y8;w?>Ct zNE#Fl$w=jFC!KgAWetxBI*72VRK0Z;Uc}n)rsG3&nZE`5!M$aCUfoM-YDef1PRQ1d_JA#8w+>CJwcMV zLf}>;g0IM#T|4N=YErxD-m;glUik}U&c||vb+0jL@J9$v>L+7XP1t7D^_%m{uQJU4 z)L`CZoV+o^EMTk{Lt)Es0V5I)!^nT5??>`dU{FsjrQQj%+_o=lsnUrLOsef6z20Rc zTX>&^z$(A>(G_HZ8q`sGO3YcCPpbO5@mkyo%&kCVE*~H{^?yRWjfKcuA9=6Ip^^A@ z-^ZSYO`QBM3o&iIFUaPNAh<0rXq#P=z~m|=D;u|A!O6d2nbsFB^f}XkyG7*V=6tfl zS&h*(TX1;H5qys~;ueWL+`X*_4rTr75>IE^l{H7I4r@^dx!hvODCQ(OlINCxCz6Nm zIBX>NC#FiweKn2->_hYzkkSJO%klC)4LSbD6F52iJrUI!D*}&5xr|z_m!cbUFgV<>a=5oSJPF(GAXW`l|<}KiWvb2h&N*sw>1>I)*om4X7%E z6}UW$z#*N%-JMGywrd$i-}b?j2{~0237Egauu&O%V>$MHKdHXBS-TveRdc- zcZ=zZ_f$BYrc^j;s)Zhtg+!L#fqRDCVOMz(riMSmv$6+xIlhbN4>gcO8K+2DhYgJ9 zTf!pqR-!DZ<5q^26Pe2tYG)SUkac&^f{A>xWl0VGDn(++%;$7(B z*(RZ*xdgX~CW4+{Rg?5sTTuDuDl7zsW9#NV5_9rCKKAP+N@G9Lztf2hWrkv*_e~BC z)f1~QU-I;{1%0y63>{=T^vXVuP&G#;!v-*}-wKmv=ZS}t3$^!?KyX4Vs;ipVa zd>|78?WTx+otV~u9o|&`OkDH6A@E6Hy%}IfeSF7RnMhiK%%g?#bJZ_Su k;hAKoe<>~ Date: Thu, 12 Sep 2024 17:44:24 -0700 Subject: [PATCH 48/84] Add API test and fix a bug --- onnxruntime/core/session/onnxruntime_c_api.cc | 2 +- onnxruntime/test/shared_lib/test_inference.cc | 47 +++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index 42c342356af88..a3dce94b33324 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -819,7 +819,7 @@ namespace { // Checks if there are active lora adapters and adjusts input spans. void CheckAndAdjustForLora(const OrtRunOptions* run_options, InlinedVector& input_names_with_lora, - InlinedVector input_with_lora, + InlinedVector& input_with_lora, gsl::span& input_names, gsl::span& inputs) { if (!run_options->active_adapters_.empty()) { diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc index 7a33bf8a527cd..2217b95161425 100644 --- a/onnxruntime/test/shared_lib/test_inference.cc +++ b/onnxruntime/test/shared_lib/test_inference.cc @@ -4402,6 +4402,53 @@ TEST(CApiTest, RunAsyncFail) { EXPECT_THROW(session.RunAsync(run_options, input_names, input_tensors, 1, output_names, output_values, 1, CallbackFail, nullptr), std::exception); } +TEST(CApiTest, RunWithLoraAdapter) { + + constexpr const ORTCHAR_T* model_path = TSTR("testdata/lora/two_params_lora_model.onnx"); + constexpr const ORTCHAR_T* adapter_path = TSTR("testdata/lora/two_params_lora_model.onnx_adapter"); + + Ort::Env env(ORT_LOGGING_LEVEL_WARNING); + + Ort::LoraAdapter adapter(adapter_path, nullptr); + Ort::RunOptions run_options; + run_options.SetLoraAdapterActive(adapter); + + // Single input + constexpr const std::array input_shape = {4, 4}; + std::vector input_x(16); + std::fill(input_x.begin(), input_x.end(), 1.0f); + constexpr const char* input_names[] = { "input_x" }; + constexpr const char* output_names[] = {"output"}; + + auto cpu_meminfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); + + auto input_x_val = Ort::Value::CreateTensor( + cpu_meminfo, input_x.data(), input_x.size(), input_shape.data(), input_shape.size()); + + Ort::Value inputs[] = {std::move(input_x_val)}; + + Ort::SessionOptions default_session_options; + + constexpr const std::array expected_output = { + 154.f, 176.f, 198.f, 220.f, + 154.f, 176.f, 198.f, 220.f, + 154.f, 176.f, 198.f, 220.f, + 154.f, 176.f, 198.f, 220.f}; + + Ort::Session session(env, model_path, default_session_options); + + auto outputs = session.Run(run_options, input_names, inputs, std::size(input_names), output_names, std::size(output_names)); + ASSERT_EQ(1U, outputs.size()); + + auto tensor_type_shape = outputs[0].GetTensorTypeAndShapeInfo(); + const auto elements = tensor_type_shape.GetElementCount(); + ASSERT_EQ(expected_output.size(), elements); + const float* data = outputs[0].GetTensorData(); + for (size_t i = 0; i < elements; ++i) { + EXPECT_NEAR(expected_output[i], data[i], 0.06); + } +} + struct MockGQA : public OrtCustomOp { MockGQA() { OrtCustomOp::GetMayInplace = [](int** input_index, int** output_index) { From 6024acc85e135e79ee9feff23d5153164968afbd Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 12 Sep 2024 17:46:40 -0700 Subject: [PATCH 49/84] Swithch to memory map for CreateLoraAdapter --- onnxruntime/lora/lora_adapters.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/lora/lora_adapters.cc b/onnxruntime/lora/lora_adapters.cc index 6c8d2630f0d1b..e400491769b01 100644 --- a/onnxruntime/lora/lora_adapters.cc +++ b/onnxruntime/lora/lora_adapters.cc @@ -74,7 +74,7 @@ ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapter, _In_ const ORTCHAR_T* adapter_fi API_IMPL_BEGIN auto lora_adapter = std::make_unique(); // For platforms that do not support Memmap, we can #ifdef it to ->Load(adapter_file_path) - lora_adapter->Load(adapter_file_path); + lora_adapter->MemoryMap(adapter_file_path); *adapter = reinterpret_cast(lora_adapter.release()); return nullptr; API_IMPL_END From c4c916b3bdf0c1e20352609893d3f396f254de92 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 13 Sep 2024 10:29:06 -0700 Subject: [PATCH 50/84] Remove redandunt import --- onnxruntime/test/testdata/lora/two_params_lora_model.py | 1 - 1 file changed, 1 deletion(-) diff --git a/onnxruntime/test/testdata/lora/two_params_lora_model.py b/onnxruntime/test/testdata/lora/two_params_lora_model.py index 9ca2e547c1749..ab88e52434b03 100644 --- a/onnxruntime/test/testdata/lora/two_params_lora_model.py +++ b/onnxruntime/test/testdata/lora/two_params_lora_model.py @@ -1,7 +1,6 @@ import onnx import numpy as np import onnxruntime as ort -import sys import os model_path = "C:/dev/ort_main/onnxruntime/test/testdata/lora/two_params_lora_model.onnx" From d5dedf1808c9a8ce96df82065281b31bc1274496 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 13 Sep 2024 11:51:04 -0700 Subject: [PATCH 51/84] Add session dep to lora, remove debug code --- cmake/onnxruntime_session.cmake | 1 + onnxruntime/core/session/onnxruntime_c_api.cc | 44 +++++++------------ .../python/onnxruntime_pybind_state.cc | 14 ------ 3 files changed, 18 insertions(+), 41 deletions(-) diff --git a/cmake/onnxruntime_session.cmake b/cmake/onnxruntime_session.cmake index 2325ff82dedc5..bb662f8ecd68c 100644 --- a/cmake/onnxruntime_session.cmake +++ b/cmake/onnxruntime_session.cmake @@ -31,6 +31,7 @@ source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_session_srcs}) onnxruntime_add_static_library(onnxruntime_session ${onnxruntime_session_srcs}) onnxruntime_add_include_to_target(onnxruntime_session onnxruntime_common onnxruntime_lora onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface nlohmann_json::nlohmann_json) +target_link_libraries(onnxruntime_session PRIVATE onnxruntime_lora) if(onnxruntime_ENABLE_INSTRUMENT) target_compile_definitions(onnxruntime_session PUBLIC ONNXRUNTIME_ENABLE_INSTRUMENT) endif() diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index 2419723ab09bd..31482f4588896 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -817,35 +817,32 @@ ORT_API_STATUS_IMPL(OrtApis::CreateSessionFromArray, _In_ const OrtEnv* env, _In namespace { // Checks if there are active lora adapters and adjusts input spans. -void CheckAndAdjustForLora(const OrtRunOptions* run_options, +void CheckAndAdjustForLora(const OrtRunOptions& run_options, InlinedVector& input_names_with_lora, InlinedVector& input_with_lora, gsl::span& input_names, gsl::span& inputs) { - if (!run_options->active_adapters_.empty()) { - size_t total_lora_params = 0; - for (const lora::LoraAdapter* ad : run_options->active_adapters_) { - total_lora_params += ad->GetParamNum(); - } + size_t total_lora_params = 0; + for (const lora::LoraAdapter* ad : run_options.active_adapters_) { + total_lora_params += ad->GetParamNum(); + } - input_names_with_lora.reserve(input_names.size() + total_lora_params); - input_with_lora.reserve(inputs.size() + total_lora_params); - std::copy(input_names.begin(), input_names.end(), std::back_inserter(input_names_with_lora)); - std::copy(inputs.begin(), inputs.end(), std::back_inserter(input_with_lora)); + input_names_with_lora.reserve(input_names.size() + total_lora_params); + input_with_lora.reserve(inputs.size() + total_lora_params); + std::copy(input_names.begin(), input_names.end(), std::back_inserter(input_names_with_lora)); + std::copy(inputs.begin(), inputs.end(), std::back_inserter(input_with_lora)); - for (const lora::LoraAdapter* ad : run_options->active_adapters_) { - ad->OutputAdapterParameters(std::back_inserter(input_names_with_lora), - std::back_inserter(input_with_lora)); - } - - input_names = gsl::make_span(input_names_with_lora); - inputs = gsl::make_span(input_with_lora); + for (const lora::LoraAdapter* ad : run_options.active_adapters_) { + ad->OutputAdapterParameters(std::back_inserter(input_names_with_lora), + std::back_inserter(input_with_lora)); } + + input_names = gsl::make_span(input_names_with_lora); + inputs = gsl::make_span(input_with_lora); } } // namespace - ORT_API_STATUS_IMPL(OrtApis::Run, _Inout_ OrtSession* sess, _In_opt_ const OrtRunOptions* run_options, _In_reads_(input_len) const char* const* input_names, _In_reads_(input_len) const OrtValue* const* input, size_t input_len, @@ -860,12 +857,11 @@ ORT_API_STATUS_IMPL(OrtApis::Run, _Inout_ OrtSession* sess, _In_opt_ const OrtRu auto output_span = gsl::make_span(output, output_names_len); Status status; - if (run_options) { + if (run_options != nullptr && !run_options->active_adapters_.empty()) { InlinedVector input_names_with_lora; InlinedVector input_with_lora; - CheckAndAdjustForLora(run_options, input_names_with_lora, input_with_lora, input_names_span, input_span); - + CheckAndAdjustForLora(*run_options, input_names_with_lora, input_with_lora, input_names_span, input_span); status = session->Run(*run_options, input_names_span, @@ -873,7 +869,6 @@ ORT_API_STATUS_IMPL(OrtApis::Run, _Inout_ OrtSession* sess, _In_opt_ const OrtRu output_name_span, output_span); } else { - const RunOptions default_run_options; status = session->Run(default_run_options, input_names_span, @@ -899,11 +894,6 @@ ORT_API_STATUS_IMPL(OrtApis::RunAsync, _Inout_ OrtSession* sess, _In_opt_ const auto output_name_span = gsl::make_span(output_names, output_names_len); auto output_span = gsl::make_span(output, output_names_len); - InlinedVector input_names_with_lora; - InlinedVector input_with_lora; - - CheckAndAdjustForLora(run_options, input_names_with_lora, input_with_lora, input_names_span, input_span); - return ToOrtStatus(session->RunAsync(run_options, input_names_span, input_span, diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 7cf790e7631ae..8bad7680cb17b 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -68,14 +68,6 @@ namespace onnxruntime { namespace onnxruntime { namespace python { -template -void print_span(std::ostream& os, gsl::span span) { - for (auto v : span) { - os << v << ' '; - } - os << std::endl; -} - namespace py = pybind11; using namespace onnxruntime; using namespace onnxruntime::logging; @@ -2026,23 +2018,19 @@ including arg name, arg type (contains both type and shape).)pbdoc") } feeds.reserve(total_entries); - std::cout << "Adapter inputs: " << std::endl; // Append necessary inputs for active adapters for (const auto* adapter : run_options->active_adapters_) { auto [begin, end] = adapter->GetParamIterators(); for (; begin != end; ++begin) { const auto& [name, param] = *begin; std::cout << name << ':'; - print_span(std::cout, param.GetMapped().Get().DataAsSpan()); feeds.insert(std::make_pair(name, param.GetMapped())); } } - std::cout << std::endl; } else { feeds.reserve(pyfeeds.size()); } - std::cout << "Normal inputs: " << std::endl; for (const auto& feed : pyfeeds) { // No need to process 'None's sent in by the user // to feed Optional inputs in the graph. @@ -2056,11 +2044,9 @@ including arg name, arg type (contains both type and shape).)pbdoc") } CreateGenericMLValue(px.second, GetAllocator(), feed.first, feed.second, &ml_value); ThrowIfPyErrOccured(); - std::cout << feed.first << ':'; print_span(std::cout, ml_value.Get().DataAsSpan()); feeds.insert(std::make_pair(feed.first, std::move(ml_value))); } } - std::cout << std::endl; std::vector fetches; fetches.reserve(output_names.size()); From 14ba862f167ccd963e641aa3ab7b52e1d23c6ced Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 13 Sep 2024 11:53:32 -0700 Subject: [PATCH 52/84] Applied lint --- .../core/session/onnxruntime_cxx_api.h | 2 +- .../lora/lora_format/compile_schema.py | 2 + .../lora/lora_format/lora_schema.fbs.h | 171 +++++++++--------- .../python/convert_npz_to_onnx_adapter.py | 21 ++- onnxruntime/python/onnxruntime_pybind_lora.cc | 3 +- onnxruntime/test/shared_lib/test_inference.cc | 5 +- .../testdata/lora/two_params_lora_model.py | 67 +++---- 7 files changed, 137 insertions(+), 134 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h index 3f62828f3324e..4934ff97a857b 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h @@ -740,7 +740,7 @@ struct CustomOpDomain : detail::Base { /// \brief LoraAdapter holds a set of Lora Parameters loaded from a single file struct LoraAdapter : detail::Base { /// \brief Wraps OrtApi::CreateLoraAdapter - /// + /// /// The function attempts to load the adapter from the specified file /// \param absolute_adapter_path The absolute path to the Lora adapter /// \param allocator optional pointer to a device allocator. If nullptr, the data stays on CPU. It would still diff --git a/onnxruntime/lora/lora_format/compile_schema.py b/onnxruntime/lora/lora_format/compile_schema.py index bee53885a2005..f98db367ae83b 100644 --- a/onnxruntime/lora/lora_format/compile_schema.py +++ b/onnxruntime/lora/lora_format/compile_schema.py @@ -8,6 +8,7 @@ SCRIPT_DIR = pathlib.Path(__file__).parent.resolve() + def generate_cpp(flatc: pathlib.Path, schema_path: pathlib.Path): # run flatc to generate C++ code cmd = [str(flatc), "--cpp", "--scoped-enums", "--filename-suffix", ".fbs", str(schema_path)] @@ -49,5 +50,6 @@ def main(): if "cpp" in languages: generate_cpp(flatc, schema_path) + if __name__ == "__main__": main() diff --git a/onnxruntime/lora/lora_format/lora_schema.fbs.h b/onnxruntime/lora/lora_format/lora_schema.fbs.h index a75082af811fc..097528d854bf8 100644 --- a/onnxruntime/lora/lora_format/lora_schema.fbs.h +++ b/onnxruntime/lora/lora_format/lora_schema.fbs.h @@ -1,6 +1,5 @@ // automatically generated by the FlatBuffers compiler, do not modify - #ifndef FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_H_ #define FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_H_ @@ -9,9 +8,9 @@ // Ensure the included flatbuffers.h is the same version as when this file was // generated, otherwise it may not be compatible. static_assert(FLATBUFFERS_VERSION_MAJOR == 23 && - FLATBUFFERS_VERSION_MINOR == 5 && - FLATBUFFERS_VERSION_REVISION == 26, - "Non-compatible flatbuffers version included"); + FLATBUFFERS_VERSION_MINOR == 5 && + FLATBUFFERS_VERSION_REVISION == 26, + "Non-compatible flatbuffers version included"); namespace onnxruntime { namespace lora { @@ -50,60 +49,58 @@ enum class TensorDataType : int32_t { inline const TensorDataType (&EnumValuesTensorDataType())[21] { static const TensorDataType values[] = { - TensorDataType::UNDEFINED, - TensorDataType::FLOAT, - TensorDataType::UINT8, - TensorDataType::INT8, - TensorDataType::UINT16, - TensorDataType::INT16, - TensorDataType::INT32, - TensorDataType::INT64, - TensorDataType::STRING, - TensorDataType::BOOL, - TensorDataType::FLOAT16, - TensorDataType::DOUBLE, - TensorDataType::UINT32, - TensorDataType::UINT64, - TensorDataType::COMPLEX64, - TensorDataType::COMPLEX128, - TensorDataType::BFLOAT16, - TensorDataType::FLOAT8E4M3FN, - TensorDataType::FLOAT8E4M3FNUZ, - TensorDataType::FLOAT8E5M2, - TensorDataType::FLOAT8E5M2FNUZ - }; + TensorDataType::UNDEFINED, + TensorDataType::FLOAT, + TensorDataType::UINT8, + TensorDataType::INT8, + TensorDataType::UINT16, + TensorDataType::INT16, + TensorDataType::INT32, + TensorDataType::INT64, + TensorDataType::STRING, + TensorDataType::BOOL, + TensorDataType::FLOAT16, + TensorDataType::DOUBLE, + TensorDataType::UINT32, + TensorDataType::UINT64, + TensorDataType::COMPLEX64, + TensorDataType::COMPLEX128, + TensorDataType::BFLOAT16, + TensorDataType::FLOAT8E4M3FN, + TensorDataType::FLOAT8E4M3FNUZ, + TensorDataType::FLOAT8E5M2, + TensorDataType::FLOAT8E5M2FNUZ}; return values; } -inline const char * const *EnumNamesTensorDataType() { - static const char * const names[22] = { - "UNDEFINED", - "FLOAT", - "UINT8", - "INT8", - "UINT16", - "INT16", - "INT32", - "INT64", - "STRING", - "BOOL", - "FLOAT16", - "DOUBLE", - "UINT32", - "UINT64", - "COMPLEX64", - "COMPLEX128", - "BFLOAT16", - "FLOAT8E4M3FN", - "FLOAT8E4M3FNUZ", - "FLOAT8E5M2", - "FLOAT8E5M2FNUZ", - nullptr - }; +inline const char* const* EnumNamesTensorDataType() { + static const char* const names[22] = { + "UNDEFINED", + "FLOAT", + "UINT8", + "INT8", + "UINT16", + "INT16", + "INT32", + "INT64", + "STRING", + "BOOL", + "FLOAT16", + "DOUBLE", + "UINT32", + "UINT64", + "COMPLEX64", + "COMPLEX128", + "BFLOAT16", + "FLOAT8E4M3FN", + "FLOAT8E4M3FNUZ", + "FLOAT8E5M2", + "FLOAT8E5M2FNUZ", + nullptr}; return names; } -inline const char *EnumNameTensorDataType(TensorDataType e) { +inline const char* EnumNameTensorDataType(TensorDataType e) { if (::flatbuffers::IsOutRange(e, TensorDataType::UNDEFINED, TensorDataType::FLOAT8E5M2FNUZ)) return ""; const size_t index = static_cast(e); return EnumNamesTensorDataType()[index]; @@ -117,19 +114,19 @@ struct Parameter FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { VT_DATA_TYPE = 8, VT_RAW_DATA = 10 }; - const ::flatbuffers::String *name() const { - return GetPointer(VT_NAME); + const ::flatbuffers::String* name() const { + return GetPointer(VT_NAME); } - const ::flatbuffers::Vector *dims() const { - return GetPointer *>(VT_DIMS); + const ::flatbuffers::Vector* dims() const { + return GetPointer*>(VT_DIMS); } onnxruntime::lora::TensorDataType data_type() const { return static_cast(GetField(VT_DATA_TYPE, 0)); } - const ::flatbuffers::Vector *raw_data() const { - return GetPointer *>(VT_RAW_DATA); + const ::flatbuffers::Vector* raw_data() const { + return GetPointer*>(VT_RAW_DATA); } - bool Verify(::flatbuffers::Verifier &verifier) const { + bool Verify(::flatbuffers::Verifier& verifier) const { return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NAME) && verifier.VerifyString(name()) && @@ -144,7 +141,7 @@ struct Parameter FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { struct ParameterBuilder { typedef Parameter Table; - ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::FlatBufferBuilder& fbb_; ::flatbuffers::uoffset_t start_; void add_name(::flatbuffers::Offset<::flatbuffers::String> name) { fbb_.AddOffset(Parameter::VT_NAME, name); @@ -158,8 +155,8 @@ struct ParameterBuilder { void add_raw_data(::flatbuffers::Offset<::flatbuffers::Vector> raw_data) { fbb_.AddOffset(Parameter::VT_RAW_DATA, raw_data); } - explicit ParameterBuilder(::flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + explicit ParameterBuilder(::flatbuffers::FlatBufferBuilder& _fbb) + : fbb_(_fbb) { start_ = fbb_.StartTable(); } ::flatbuffers::Offset Finish() { @@ -170,7 +167,7 @@ struct ParameterBuilder { }; inline ::flatbuffers::Offset CreateParameter( - ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::FlatBufferBuilder& _fbb, ::flatbuffers::Offset<::flatbuffers::String> name = 0, ::flatbuffers::Offset<::flatbuffers::Vector> dims = 0, onnxruntime::lora::TensorDataType data_type = onnxruntime::lora::TensorDataType::UNDEFINED, @@ -184,14 +181,16 @@ inline ::flatbuffers::Offset CreateParameter( } inline ::flatbuffers::Offset CreateParameterDirect( - ::flatbuffers::FlatBufferBuilder &_fbb, - const char *name = nullptr, - const std::vector *dims = nullptr, + ::flatbuffers::FlatBufferBuilder& _fbb, + const char* name = nullptr, + const std::vector* dims = nullptr, onnxruntime::lora::TensorDataType data_type = onnxruntime::lora::TensorDataType::UNDEFINED, - const std::vector *raw_data = nullptr) { + const std::vector* raw_data = nullptr) { auto name__ = name ? _fbb.CreateString(name) : 0; auto dims__ = dims ? _fbb.CreateVector(*dims) : 0; - if (raw_data) { _fbb.ForceVectorAlignment(raw_data->size(), sizeof(uint8_t), 8); } + if (raw_data) { + _fbb.ForceVectorAlignment(raw_data->size(), sizeof(uint8_t), 8); + } auto raw_data__ = raw_data ? _fbb.CreateVector(*raw_data) : 0; return onnxruntime::lora::CreateParameter( _fbb, @@ -218,10 +217,10 @@ struct Adapter FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { int32_t model_version() const { return GetField(VT_MODEL_VERSION, 0); } - const ::flatbuffers::Vector<::flatbuffers::Offset> *parameters() const { - return GetPointer> *>(VT_PARAMETERS); + const ::flatbuffers::Vector<::flatbuffers::Offset>* parameters() const { + return GetPointer>*>(VT_PARAMETERS); } - bool Verify(::flatbuffers::Verifier &verifier) const { + bool Verify(::flatbuffers::Verifier& verifier) const { return VerifyTableStart(verifier) && VerifyField(verifier, VT_FORMAT_VERSION, 4) && VerifyField(verifier, VT_ADAPTER_VERSION, 4) && @@ -235,7 +234,7 @@ struct Adapter FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { struct AdapterBuilder { typedef Adapter Table; - ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::FlatBufferBuilder& fbb_; ::flatbuffers::uoffset_t start_; void add_format_version(int32_t format_version) { fbb_.AddElement(Adapter::VT_FORMAT_VERSION, format_version, 0); @@ -249,8 +248,8 @@ struct AdapterBuilder { void add_parameters(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> parameters) { fbb_.AddOffset(Adapter::VT_PARAMETERS, parameters); } - explicit AdapterBuilder(::flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + explicit AdapterBuilder(::flatbuffers::FlatBufferBuilder& _fbb) + : fbb_(_fbb) { start_ = fbb_.StartTable(); } ::flatbuffers::Offset Finish() { @@ -261,7 +260,7 @@ struct AdapterBuilder { }; inline ::flatbuffers::Offset CreateAdapter( - ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::FlatBufferBuilder& _fbb, int32_t format_version = 0, int32_t adapter_version = 0, int32_t model_version = 0, @@ -275,11 +274,11 @@ inline ::flatbuffers::Offset CreateAdapter( } inline ::flatbuffers::Offset CreateAdapterDirect( - ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::FlatBufferBuilder& _fbb, int32_t format_version = 0, int32_t adapter_version = 0, int32_t model_version = 0, - const std::vector<::flatbuffers::Offset> *parameters = nullptr) { + const std::vector<::flatbuffers::Offset>* parameters = nullptr) { auto parameters__ = parameters ? _fbb.CreateVector<::flatbuffers::Offset>(*parameters) : 0; return onnxruntime::lora::CreateAdapter( _fbb, @@ -289,46 +288,46 @@ inline ::flatbuffers::Offset CreateAdapterDirect( parameters__); } -inline const onnxruntime::lora::Adapter *GetAdapter(const void *buf) { +inline const onnxruntime::lora::Adapter* GetAdapter(const void* buf) { return ::flatbuffers::GetRoot(buf); } -inline const onnxruntime::lora::Adapter *GetSizePrefixedAdapter(const void *buf) { +inline const onnxruntime::lora::Adapter* GetSizePrefixedAdapter(const void* buf) { return ::flatbuffers::GetSizePrefixedRoot(buf); } -inline const char *AdapterIdentifier() { +inline const char* AdapterIdentifier() { return "GAIL"; } -inline bool AdapterBufferHasIdentifier(const void *buf) { +inline bool AdapterBufferHasIdentifier(const void* buf) { return ::flatbuffers::BufferHasIdentifier( buf, AdapterIdentifier()); } -inline bool SizePrefixedAdapterBufferHasIdentifier(const void *buf) { +inline bool SizePrefixedAdapterBufferHasIdentifier(const void* buf) { return ::flatbuffers::BufferHasIdentifier( buf, AdapterIdentifier(), true); } inline bool VerifyAdapterBuffer( - ::flatbuffers::Verifier &verifier) { + ::flatbuffers::Verifier& verifier) { return verifier.VerifyBuffer(AdapterIdentifier()); } inline bool VerifySizePrefixedAdapterBuffer( - ::flatbuffers::Verifier &verifier) { + ::flatbuffers::Verifier& verifier) { return verifier.VerifySizePrefixedBuffer(AdapterIdentifier()); } inline void FinishAdapterBuffer( - ::flatbuffers::FlatBufferBuilder &fbb, + ::flatbuffers::FlatBufferBuilder& fbb, ::flatbuffers::Offset root) { fbb.Finish(root, AdapterIdentifier()); } inline void FinishSizePrefixedAdapterBuffer( - ::flatbuffers::FlatBufferBuilder &fbb, + ::flatbuffers::FlatBufferBuilder& fbb, ::flatbuffers::Offset root) { fbb.FinishSizePrefixed(root, AdapterIdentifier()); } diff --git a/onnxruntime/python/convert_npz_to_onnx_adapter.py b/onnxruntime/python/convert_npz_to_onnx_adapter.py index ba9f3aa8802f4..94bfe69e34cf3 100644 --- a/onnxruntime/python/convert_npz_to_onnx_adapter.py +++ b/onnxruntime/python/convert_npz_to_onnx_adapter.py @@ -4,12 +4,13 @@ # This script helps converting .npz files to .onnx_adapter files import argparse -import json +import os +import sys + import numpy as np + import onnxruntime as ort -import os -import sys def get_args() -> argparse: parser = argparse.ArgumentParser() @@ -20,11 +21,10 @@ def get_args() -> argparse: return parser.parse_args() -def export_lora_parameters(npz_file_path : os.PathLike, - adapter_version: int, model_version: int, - output_file_path : os.PathLike): - '''The function converts lora parameters in npz to onnx_adapter format - ''' +def export_lora_parameters( + npz_file_path: os.PathLike, adapter_version: int, model_version: int, output_file_path: os.PathLike +): + """The function converts lora parameters in npz to onnx_adapter format""" adapter_format = ort.AdapterFormat() adapter_format.set_adapter_version(adapter_version) adapter_format.set_model_version(model_version) @@ -37,11 +37,12 @@ def export_lora_parameters(npz_file_path : os.PathLike, adapter_format.set_parameters(name_to_ort_value) adapter_format.export_adapter(output_file_path) + def main() -> int: args = get_args() - export_lora_parameters(args.npz_file_path, args.adapter_version, - args.model_version, args.output_file_path) + export_lora_parameters(args.npz_file_path, args.adapter_version, args.model_version, args.output_file_path) return 0 + if __name__ == "__main__": sys.exit(main()) diff --git a/onnxruntime/python/onnxruntime_pybind_lora.cc b/onnxruntime/python/onnxruntime_pybind_lora.cc index 41647e8ec3034..c99236498e5b6 100644 --- a/onnxruntime/python/onnxruntime_pybind_lora.cc +++ b/onnxruntime/python/onnxruntime_pybind_lora.cc @@ -144,8 +144,7 @@ void addAdapterFormatMethods(pybind11::module& m) { py::class_ lora_adapter_binding(m, "LoraAdapter"); lora_adapter_binding.def(py::init()) - .def("Load", [](lora::LoraAdapter* adapter, const std::wstring& file_path) { adapter->Load(file_path); }, - R"pbdoc(Memory map the specified file as LoraAdapter)pbdoc"); + .def("Load", [](lora::LoraAdapter* adapter, const std::wstring& file_path) { adapter->Load(file_path); }, R"pbdoc(Memory map the specified file as LoraAdapter)pbdoc"); } } // namespace python diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc index 2217b95161425..9308075fe52a5 100644 --- a/onnxruntime/test/shared_lib/test_inference.cc +++ b/onnxruntime/test/shared_lib/test_inference.cc @@ -4403,7 +4403,6 @@ TEST(CApiTest, RunAsyncFail) { } TEST(CApiTest, RunWithLoraAdapter) { - constexpr const ORTCHAR_T* model_path = TSTR("testdata/lora/two_params_lora_model.onnx"); constexpr const ORTCHAR_T* adapter_path = TSTR("testdata/lora/two_params_lora_model.onnx_adapter"); @@ -4417,13 +4416,13 @@ TEST(CApiTest, RunWithLoraAdapter) { constexpr const std::array input_shape = {4, 4}; std::vector input_x(16); std::fill(input_x.begin(), input_x.end(), 1.0f); - constexpr const char* input_names[] = { "input_x" }; + constexpr const char* input_names[] = {"input_x"}; constexpr const char* output_names[] = {"output"}; auto cpu_meminfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); auto input_x_val = Ort::Value::CreateTensor( - cpu_meminfo, input_x.data(), input_x.size(), input_shape.data(), input_shape.size()); + cpu_meminfo, input_x.data(), input_x.size(), input_shape.data(), input_shape.size()); Ort::Value inputs[] = {std::move(input_x_val)}; diff --git a/onnxruntime/test/testdata/lora/two_params_lora_model.py b/onnxruntime/test/testdata/lora/two_params_lora_model.py index ab88e52434b03..8e51c4395f67e 100644 --- a/onnxruntime/test/testdata/lora/two_params_lora_model.py +++ b/onnxruntime/test/testdata/lora/two_params_lora_model.py @@ -1,11 +1,14 @@ -import onnx +import os + import numpy as np +import onnx + import onnxruntime as ort -import os model_path = "C:/dev/ort_main/onnxruntime/test/testdata/lora/two_params_lora_model.onnx" adapter_path = "C:/dev/ort_main/onnxruntime/test/testdata/lora/two_params_lora_model.onnx_adapter" + def create_model(model_path: os.PathLike): #### Inputs # original input_x and its associated weight @@ -20,24 +23,24 @@ def create_model(model_path: os.PathLike): #### Initializers # Base weight tensor proto - weight_x = np.array([1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 16]).reshape(4, 4).astype(np.float32) - weight_x_tensor = onnx.helper.make_tensor("weight_x", - onnx.TensorProto.FLOAT, [4, 4], weight_x.flatten()) + weight_x = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]).reshape(4, 4).astype(np.float32) + weight_x_tensor = onnx.helper.make_tensor("weight_x", onnx.TensorProto.FLOAT, [4, 4], weight_x.flatten()) # tensor proto for default lora parameter A lora_weight_a = np.zeros([4, 0], dtype=np.float32) - lora_weight_a_tensor = onnx.helper.make_tensor("lora_param_a", - onnx.TensorProto.FLOAT, [4, 0], lora_weight_a.flatten()) + lora_weight_a_tensor = onnx.helper.make_tensor( + "lora_param_a", onnx.TensorProto.FLOAT, [4, 0], lora_weight_a.flatten() + ) # tensor proto for default lora parameter B lora_weight_b = np.zeros([0, 4], dtype=np.float32) - lora_weight_b_tensor = onnx.helper.make_tensor("lora_param_b", - onnx.TensorProto.FLOAT, [0, 4], lora_weight_b.flatten()) + lora_weight_b_tensor = onnx.helper.make_tensor( + "lora_param_b", onnx.TensorProto.FLOAT, [0, 4], lora_weight_b.flatten() + ) ##### Linear nodes # Create matmul for base case - matmul_x = onnx.helper.make_node("MatMul", ["input_x", "weight_x"], ["mm_output_x"]); + matmul_x = onnx.helper.make_node("MatMul", ["input_x", "weight_x"], ["mm_output_x"]) # create matmul node for lora_param_a matmul_a = onnx.helper.make_node("MatMul", ["input_x", "lora_param_a"], ["mm_output_a"]) # Create matmul for lora_param_b @@ -51,15 +54,17 @@ def create_model(model_path: os.PathLike): nodes=[matmul_x, matmul_a, matmul_b, add_node], inputs=[input_x, lora_param_a_input, lora_param_b_input], outputs=[output], - initializer=[weight_x_tensor, lora_weight_a_tensor, lora_weight_b_tensor]) + initializer=[weight_x_tensor, lora_weight_a_tensor, lora_weight_b_tensor], + ) # create a model model = onnx.helper.make_model(graph) - #onnx.checker.check_model(model, full_check=True) + # onnx.checker.check_model(model, full_check=True) onnx.save_model(model, model_path) + def create_adapter(adapter_path: os.PathLike): """ Creates an test adapter for the model above @@ -77,10 +82,7 @@ def create_adapter(adapter_path: os.PathLike): print(param_a) print(param_b) - name_to_value = { - "lora_param_a" : ort_value_a, - "lora_param_b" : ort_value_b - } + name_to_value = {"lora_param_a": ort_value_a, "lora_param_b": ort_value_b} adapter_format = ort.AdapterFormat() adapter_format.set_adapter_version(1) @@ -88,7 +90,8 @@ def create_adapter(adapter_path: os.PathLike): adapter_format.set_parameters(name_to_value) adapter_format.export_adapter(adapter_path) -def read_adapter(adapter_path: os.PathLike) : + +def read_adapter(adapter_path: os.PathLike): adapter = ort.AdapterFormat.read_adapter(adapter_path) params = adapter.get_parameters() @@ -101,29 +104,30 @@ def read_adapter(adapter_path: os.PathLike) : numpy_b = params["lora_param_b"].numpy() print(numpy_b) + def run_base_model(model_path: os.PathLike): session = ort.InferenceSession(model_path) # Run the base case - inputs = { - "input_x": np.ones((4, 4), dtype=np.float32) - } + inputs = {"input_x": np.ones((4, 4), dtype=np.float32)} outputs = session.run(None, inputs) print(outputs) + def run_with_override(model_path: os.PathLike): session = ort.InferenceSession(model_path) inputs = { "input_x": np.ones((4, 4), dtype=np.float32), - "lora_param_a" : np.array([3, 4, 5, 6]).astype(np.float32).reshape(4, 1), - "lora_param_b" : np.array([7, 8, 9, 10]).astype(np.float32).reshape(1, 4) + "lora_param_a": np.array([3, 4, 5, 6]).astype(np.float32).reshape(4, 1), + "lora_param_b": np.array([7, 8, 9, 10]).astype(np.float32).reshape(1, 4), } outputs = session.run(None, inputs) print(outputs) + def run_with_adapter(model_path: os.PathLike, adapter_path: os.PathLike): adapter = ort.LoraAdapter() adapter.Load(adapter_path) @@ -133,17 +137,16 @@ def run_with_adapter(model_path: os.PathLike, adapter_path: os.PathLike): session = ort.InferenceSession(model_path) - inputs = { - "input_x": np.ones((4, 4), dtype=np.float32) - } + inputs = {"input_x": np.ones((4, 4), dtype=np.float32)} outputs = session.run(None, inputs, run_options) print(outputs) - + + if __name__ == "__main__": - #create_model(model_path) - #run_base_model(model_path) + # create_model(model_path) + # run_base_model(model_path) run_with_override(model_path) - #create_adapter(adapter_path) - #read_adapter(adapter_path) - run_with_adapter(model_path, adapter_path) \ No newline at end of file + # create_adapter(adapter_path) + # read_adapter(adapter_path) + run_with_adapter(model_path, adapter_path) From 71b3bbe682339256fa094478ea5231864bf0acba Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 13 Sep 2024 12:11:02 -0700 Subject: [PATCH 53/84] Fix stray windows specific declarations --- onnxruntime/core/session/onnxruntime_c_api.cc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index 31482f4588896..69cc55d0a4a6f 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -2824,7 +2824,7 @@ ORT_API(const char*, OrtApis::GetVersionString) { return ORT_VERSION; } -const char* _stdcall OrtApis::GetBuildInfoString() noexcept { +const char* ORT_API_CALL OrtApis::GetBuildInfoString() noexcept { return ORT_BUILD_INFO; } @@ -2837,8 +2837,6 @@ ORT_API(void, OrtApis::ReleaseEnv, OrtEnv* value) { } DEFINE_RELEASE_ORT_OBJECT_FUNCTION(Value, OrtValue) -void _stdcall OrtApis::ReleaseRunOptions(OrtRunOptions* value) noexcept { - delete reinterpret_cast(value); -} +DEFINE_RELEASE_ORT_OBJECT_FUNCTION(RunOptions, OrtRunOptions) DEFINE_RELEASE_ORT_OBJECT_FUNCTION(Session, ::onnxruntime::InferenceSession) DEFINE_RELEASE_ORT_OBJECT_FUNCTION(ModelMetadata, ::onnxruntime::ModelMetadata) From 9cf6649818bbeee332ff1ea118cde0f62a1de361 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 13 Sep 2024 13:12:29 -0700 Subject: [PATCH 54/84] Remove old format files --- onnxruntime/lora/lora_format/README.md | 36 -- .../lora/lora_format/compile_schema.py | 55 --- onnxruntime/lora/lora_format/lora_schema.fbs | 51 --- .../lora/lora_format/lora_schema.fbs.h | 338 ------------------ onnxruntime/lora/lora_format_utils.cc | 158 -------- onnxruntime/lora/lora_format_utils.h | 138 ------- onnxruntime/lora/lora_format_version.h | 33 -- onnxruntime/python/onnxruntime_pybind_lora.cc | 3 +- 8 files changed, 2 insertions(+), 810 deletions(-) delete mode 100644 onnxruntime/lora/lora_format/README.md delete mode 100644 onnxruntime/lora/lora_format/compile_schema.py delete mode 100644 onnxruntime/lora/lora_format/lora_schema.fbs delete mode 100644 onnxruntime/lora/lora_format/lora_schema.fbs.h delete mode 100644 onnxruntime/lora/lora_format_utils.cc delete mode 100644 onnxruntime/lora/lora_format_utils.h delete mode 100644 onnxruntime/lora/lora_format_version.h diff --git a/onnxruntime/lora/lora_format/README.md b/onnxruntime/lora/lora_format/README.md deleted file mode 100644 index d28f47186cbea..0000000000000 --- a/onnxruntime/lora/lora_format/README.md +++ /dev/null @@ -1,36 +0,0 @@ -# Lora Parameters Flatbuffer Schemas -This directory contains [ONNXRuntime Lora Parameter format schema](lora_schema.fbs) and [the generated C++ header file](lora_schema.fbs.h) for the -Lora Parameters file format. This file format is defined as means to deliver Lora parameters so it can read by ONNXRuntime C++ code. - -The format format is generally designed to house a single Lora adapter named Lora parameters. - -[ONNXRuntime Lora Parameter file format schema](lora_schema.fbs) uses the [FlatBuffers](https://github.com/google/flatbuffers) serialization library. - -Please do not directly modify the generated C++ header file for [ONNXRuntime Lora Parameter file format]((lora_schema.fbs.h)). - -Use flatc compiler for the purpose. - -e.g. - - Windows Debug build - - \build\Windows\Debug\_deps\flatbuffers-build\Debug\flatc.exe - - Linux Debug build - - /build/Linux/Debug/_deps/flatbuffers-build/flatc - -It is possible to use another flatc as well, e.g., from a separate installation. - -To update the flatbuffers schemas and generated files: -1. Modify [ONNXRuntime Lora Parameter file format schema](lora_schema.fbs). -2. Run [compile_schema.py](./compile_schema.py) to generate the C++ bindings. - - ``` - python onnxruntime/lora/lora_format/compile_schema.py --flatc - ``` -# Lora format version history -In [lora_format_version.h](../lora_format_version.h), see `IsLoraParameterslVersionSupported()` for the supported versions and -`kLoraParametersVersion` for the current version. - -## Version 1 -History begins. - -Initial support for FlatBuffers that Lora Parameters support. This includes a definition of Tensor entity -so it can be saved in a tensor per file format. diff --git a/onnxruntime/lora/lora_format/compile_schema.py b/onnxruntime/lora/lora_format/compile_schema.py deleted file mode 100644 index f98db367ae83b..0000000000000 --- a/onnxruntime/lora/lora_format/compile_schema.py +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. - -import argparse -import pathlib -import subprocess - -SCRIPT_DIR = pathlib.Path(__file__).parent.resolve() - - -def generate_cpp(flatc: pathlib.Path, schema_path: pathlib.Path): - # run flatc to generate C++ code - cmd = [str(flatc), "--cpp", "--scoped-enums", "--filename-suffix", ".fbs", str(schema_path)] - subprocess.run(cmd, check=True, cwd=SCRIPT_DIR) - - -def main(): - parser = argparse.ArgumentParser( - description="Generate language bindings for the ORT flatbuffers schema.", - usage="Provide the path to the flatbuffers flatc executable. " - "Script can be executed from anywhere but must be located in its original " - "directory in the ONNX Runtime enlistment.", - ) - - parser.add_argument( - "-f", - "--flatc", - required=True, - type=pathlib.Path, - help="Path to flatbuffers flatc executable. " - "Can be found in the build directory under _deps/flatbuffers-build//", - ) - - all_languages = ["cpp"] - parser.add_argument( - "-l", - "--language", - action="append", - dest="languages", - choices=all_languages, - help="Specify which language bindings to generate.", - ) - - args = parser.parse_args() - languages = args.languages if args.languages is not None else all_languages - flatc = args.flatc.resolve(strict=True) - schema_path = SCRIPT_DIR / "lora_schema.fbs" - - if "cpp" in languages: - generate_cpp(flatc, schema_path) - - -if __name__ == "__main__": - main() diff --git a/onnxruntime/lora/lora_format/lora_schema.fbs b/onnxruntime/lora/lora_format/lora_schema.fbs deleted file mode 100644 index 37e8195dab6f2..0000000000000 --- a/onnxruntime/lora/lora_format/lora_schema.fbs +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -namespace onnxruntime.lora; - -// Tensor -enum TensorDataType : int32 { - UNDEFINED = 0, - FLOAT = 1, - UINT8 = 2, - INT8 = 3, - UINT16 = 4, - INT16 = 5, - INT32 = 6, - INT64 = 7, - STRING = 8, - BOOL = 9, - FLOAT16 = 10, - DOUBLE = 11, - UINT32 = 12, - UINT64 = 13, - COMPLEX64 = 14, - COMPLEX128 = 15, - BFLOAT16 = 16, - FLOAT8E4M3FN = 17, - FLOAT8E4M3FNUZ = 18, - FLOAT8E5M2 = 19, - FLOAT8E5M2FNUZ = 20, -} - -// For simplicity, we will have only have one data field -// - raw_data for all primitive types. -// We do not foresee strings as parameters. -table Parameter { - name:string; - - dims:[int64]; - data_type:TensorDataType; - - raw_data:[uint8] (force_align : 8); -} - -table Adapter { - format_version:int; - adapter_version:int; - model_version:int; - parameters:[Parameter]; -} - -root_type Adapter; -file_identifier "GAIL"; diff --git a/onnxruntime/lora/lora_format/lora_schema.fbs.h b/onnxruntime/lora/lora_format/lora_schema.fbs.h deleted file mode 100644 index 097528d854bf8..0000000000000 --- a/onnxruntime/lora/lora_format/lora_schema.fbs.h +++ /dev/null @@ -1,338 +0,0 @@ -// automatically generated by the FlatBuffers compiler, do not modify - -#ifndef FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_H_ -#define FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_H_ - -#include "flatbuffers/flatbuffers.h" - -// Ensure the included flatbuffers.h is the same version as when this file was -// generated, otherwise it may not be compatible. -static_assert(FLATBUFFERS_VERSION_MAJOR == 23 && - FLATBUFFERS_VERSION_MINOR == 5 && - FLATBUFFERS_VERSION_REVISION == 26, - "Non-compatible flatbuffers version included"); - -namespace onnxruntime { -namespace lora { - -struct Parameter; -struct ParameterBuilder; - -struct Adapter; -struct AdapterBuilder; - -enum class TensorDataType : int32_t { - UNDEFINED = 0, - FLOAT = 1, - UINT8 = 2, - INT8 = 3, - UINT16 = 4, - INT16 = 5, - INT32 = 6, - INT64 = 7, - STRING = 8, - BOOL = 9, - FLOAT16 = 10, - DOUBLE = 11, - UINT32 = 12, - UINT64 = 13, - COMPLEX64 = 14, - COMPLEX128 = 15, - BFLOAT16 = 16, - FLOAT8E4M3FN = 17, - FLOAT8E4M3FNUZ = 18, - FLOAT8E5M2 = 19, - FLOAT8E5M2FNUZ = 20, - MIN = UNDEFINED, - MAX = FLOAT8E5M2FNUZ -}; - -inline const TensorDataType (&EnumValuesTensorDataType())[21] { - static const TensorDataType values[] = { - TensorDataType::UNDEFINED, - TensorDataType::FLOAT, - TensorDataType::UINT8, - TensorDataType::INT8, - TensorDataType::UINT16, - TensorDataType::INT16, - TensorDataType::INT32, - TensorDataType::INT64, - TensorDataType::STRING, - TensorDataType::BOOL, - TensorDataType::FLOAT16, - TensorDataType::DOUBLE, - TensorDataType::UINT32, - TensorDataType::UINT64, - TensorDataType::COMPLEX64, - TensorDataType::COMPLEX128, - TensorDataType::BFLOAT16, - TensorDataType::FLOAT8E4M3FN, - TensorDataType::FLOAT8E4M3FNUZ, - TensorDataType::FLOAT8E5M2, - TensorDataType::FLOAT8E5M2FNUZ}; - return values; -} - -inline const char* const* EnumNamesTensorDataType() { - static const char* const names[22] = { - "UNDEFINED", - "FLOAT", - "UINT8", - "INT8", - "UINT16", - "INT16", - "INT32", - "INT64", - "STRING", - "BOOL", - "FLOAT16", - "DOUBLE", - "UINT32", - "UINT64", - "COMPLEX64", - "COMPLEX128", - "BFLOAT16", - "FLOAT8E4M3FN", - "FLOAT8E4M3FNUZ", - "FLOAT8E5M2", - "FLOAT8E5M2FNUZ", - nullptr}; - return names; -} - -inline const char* EnumNameTensorDataType(TensorDataType e) { - if (::flatbuffers::IsOutRange(e, TensorDataType::UNDEFINED, TensorDataType::FLOAT8E5M2FNUZ)) return ""; - const size_t index = static_cast(e); - return EnumNamesTensorDataType()[index]; -} - -struct Parameter FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { - typedef ParameterBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_NAME = 4, - VT_DIMS = 6, - VT_DATA_TYPE = 8, - VT_RAW_DATA = 10 - }; - const ::flatbuffers::String* name() const { - return GetPointer(VT_NAME); - } - const ::flatbuffers::Vector* dims() const { - return GetPointer*>(VT_DIMS); - } - onnxruntime::lora::TensorDataType data_type() const { - return static_cast(GetField(VT_DATA_TYPE, 0)); - } - const ::flatbuffers::Vector* raw_data() const { - return GetPointer*>(VT_RAW_DATA); - } - bool Verify(::flatbuffers::Verifier& verifier) const { - return VerifyTableStart(verifier) && - VerifyOffset(verifier, VT_NAME) && - verifier.VerifyString(name()) && - VerifyOffset(verifier, VT_DIMS) && - verifier.VerifyVector(dims()) && - VerifyField(verifier, VT_DATA_TYPE, 4) && - VerifyOffset(verifier, VT_RAW_DATA) && - verifier.VerifyVector(raw_data()) && - verifier.EndTable(); - } -}; - -struct ParameterBuilder { - typedef Parameter Table; - ::flatbuffers::FlatBufferBuilder& fbb_; - ::flatbuffers::uoffset_t start_; - void add_name(::flatbuffers::Offset<::flatbuffers::String> name) { - fbb_.AddOffset(Parameter::VT_NAME, name); - } - void add_dims(::flatbuffers::Offset<::flatbuffers::Vector> dims) { - fbb_.AddOffset(Parameter::VT_DIMS, dims); - } - void add_data_type(onnxruntime::lora::TensorDataType data_type) { - fbb_.AddElement(Parameter::VT_DATA_TYPE, static_cast(data_type), 0); - } - void add_raw_data(::flatbuffers::Offset<::flatbuffers::Vector> raw_data) { - fbb_.AddOffset(Parameter::VT_RAW_DATA, raw_data); - } - explicit ParameterBuilder(::flatbuffers::FlatBufferBuilder& _fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - ::flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = ::flatbuffers::Offset(end); - return o; - } -}; - -inline ::flatbuffers::Offset CreateParameter( - ::flatbuffers::FlatBufferBuilder& _fbb, - ::flatbuffers::Offset<::flatbuffers::String> name = 0, - ::flatbuffers::Offset<::flatbuffers::Vector> dims = 0, - onnxruntime::lora::TensorDataType data_type = onnxruntime::lora::TensorDataType::UNDEFINED, - ::flatbuffers::Offset<::flatbuffers::Vector> raw_data = 0) { - ParameterBuilder builder_(_fbb); - builder_.add_raw_data(raw_data); - builder_.add_data_type(data_type); - builder_.add_dims(dims); - builder_.add_name(name); - return builder_.Finish(); -} - -inline ::flatbuffers::Offset CreateParameterDirect( - ::flatbuffers::FlatBufferBuilder& _fbb, - const char* name = nullptr, - const std::vector* dims = nullptr, - onnxruntime::lora::TensorDataType data_type = onnxruntime::lora::TensorDataType::UNDEFINED, - const std::vector* raw_data = nullptr) { - auto name__ = name ? _fbb.CreateString(name) : 0; - auto dims__ = dims ? _fbb.CreateVector(*dims) : 0; - if (raw_data) { - _fbb.ForceVectorAlignment(raw_data->size(), sizeof(uint8_t), 8); - } - auto raw_data__ = raw_data ? _fbb.CreateVector(*raw_data) : 0; - return onnxruntime::lora::CreateParameter( - _fbb, - name__, - dims__, - data_type, - raw_data__); -} - -struct Adapter FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { - typedef AdapterBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_FORMAT_VERSION = 4, - VT_ADAPTER_VERSION = 6, - VT_MODEL_VERSION = 8, - VT_PARAMETERS = 10 - }; - int32_t format_version() const { - return GetField(VT_FORMAT_VERSION, 0); - } - int32_t adapter_version() const { - return GetField(VT_ADAPTER_VERSION, 0); - } - int32_t model_version() const { - return GetField(VT_MODEL_VERSION, 0); - } - const ::flatbuffers::Vector<::flatbuffers::Offset>* parameters() const { - return GetPointer>*>(VT_PARAMETERS); - } - bool Verify(::flatbuffers::Verifier& verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_FORMAT_VERSION, 4) && - VerifyField(verifier, VT_ADAPTER_VERSION, 4) && - VerifyField(verifier, VT_MODEL_VERSION, 4) && - VerifyOffset(verifier, VT_PARAMETERS) && - verifier.VerifyVector(parameters()) && - verifier.VerifyVectorOfTables(parameters()) && - verifier.EndTable(); - } -}; - -struct AdapterBuilder { - typedef Adapter Table; - ::flatbuffers::FlatBufferBuilder& fbb_; - ::flatbuffers::uoffset_t start_; - void add_format_version(int32_t format_version) { - fbb_.AddElement(Adapter::VT_FORMAT_VERSION, format_version, 0); - } - void add_adapter_version(int32_t adapter_version) { - fbb_.AddElement(Adapter::VT_ADAPTER_VERSION, adapter_version, 0); - } - void add_model_version(int32_t model_version) { - fbb_.AddElement(Adapter::VT_MODEL_VERSION, model_version, 0); - } - void add_parameters(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> parameters) { - fbb_.AddOffset(Adapter::VT_PARAMETERS, parameters); - } - explicit AdapterBuilder(::flatbuffers::FlatBufferBuilder& _fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - ::flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = ::flatbuffers::Offset(end); - return o; - } -}; - -inline ::flatbuffers::Offset CreateAdapter( - ::flatbuffers::FlatBufferBuilder& _fbb, - int32_t format_version = 0, - int32_t adapter_version = 0, - int32_t model_version = 0, - ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> parameters = 0) { - AdapterBuilder builder_(_fbb); - builder_.add_parameters(parameters); - builder_.add_model_version(model_version); - builder_.add_adapter_version(adapter_version); - builder_.add_format_version(format_version); - return builder_.Finish(); -} - -inline ::flatbuffers::Offset CreateAdapterDirect( - ::flatbuffers::FlatBufferBuilder& _fbb, - int32_t format_version = 0, - int32_t adapter_version = 0, - int32_t model_version = 0, - const std::vector<::flatbuffers::Offset>* parameters = nullptr) { - auto parameters__ = parameters ? _fbb.CreateVector<::flatbuffers::Offset>(*parameters) : 0; - return onnxruntime::lora::CreateAdapter( - _fbb, - format_version, - adapter_version, - model_version, - parameters__); -} - -inline const onnxruntime::lora::Adapter* GetAdapter(const void* buf) { - return ::flatbuffers::GetRoot(buf); -} - -inline const onnxruntime::lora::Adapter* GetSizePrefixedAdapter(const void* buf) { - return ::flatbuffers::GetSizePrefixedRoot(buf); -} - -inline const char* AdapterIdentifier() { - return "GAIL"; -} - -inline bool AdapterBufferHasIdentifier(const void* buf) { - return ::flatbuffers::BufferHasIdentifier( - buf, AdapterIdentifier()); -} - -inline bool SizePrefixedAdapterBufferHasIdentifier(const void* buf) { - return ::flatbuffers::BufferHasIdentifier( - buf, AdapterIdentifier(), true); -} - -inline bool VerifyAdapterBuffer( - ::flatbuffers::Verifier& verifier) { - return verifier.VerifyBuffer(AdapterIdentifier()); -} - -inline bool VerifySizePrefixedAdapterBuffer( - ::flatbuffers::Verifier& verifier) { - return verifier.VerifySizePrefixedBuffer(AdapterIdentifier()); -} - -inline void FinishAdapterBuffer( - ::flatbuffers::FlatBufferBuilder& fbb, - ::flatbuffers::Offset root) { - fbb.Finish(root, AdapterIdentifier()); -} - -inline void FinishSizePrefixedAdapterBuffer( - ::flatbuffers::FlatBufferBuilder& fbb, - ::flatbuffers::Offset root) { - fbb.FinishSizePrefixed(root, AdapterIdentifier()); -} - -} // namespace lora -} // namespace onnxruntime - -#endif // FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_H_ diff --git a/onnxruntime/lora/lora_format_utils.cc b/onnxruntime/lora/lora_format_utils.cc deleted file mode 100644 index 9a4c1ce6f2415..0000000000000 --- a/onnxruntime/lora/lora_format_utils.cc +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#include "lora_format_utils.h" -#include "lora_format_version.h" - -#include "core/common/common.h" -#include "core/common/span_utils.h" -#include "core/framework/ortdevice.h" -#include "core/framework/ortmemoryinfo.h" -#include "core/framework/ort_value.h" -#include "core/framework/tensor.h" - -#include - -namespace onnxruntime { -namespace lora { -namespace utils { - -bool IsLoraFormatModelBytes(const void* bytes, size_t num_bytes) { - return num_bytes > 8 && // check buffer is large enough to contain identifier so we don't read random memory - AdapterBufferHasIdentifier(bytes); -} - -flatbuffers::Offset SaveStringToLoraFormat(flatbuffers::FlatBufferBuilder& builder, - bool has_string, const std::string& src) { - if (has_string) return builder.CreateString(src); - - // If the string does not exist, return 0 (the string does not exist in flatbuffer) - return 0; -} - -void LoadStringFromLoraFormat(std::string& dst, const flatbuffers::String* fbs_string) { - if (fbs_string) { - dst = fbs_string->str(); - } -} - -std::vector LoadLoraAdapterBytes(const std::filesystem::path& file_path) { - Env& env = Env::Default(); - - size_t file_size = 0; - ORT_THROW_IF_ERROR(env.GetFileLength(file_path.c_str(), file_size)); - - std::vector result; - result.resize(file_size); - - // The API accepts char span, so we need to reinterpret the uint8_t span as char span - auto dest_span = ReinterpretAsSpan(AsSpan(result)); - ORT_THROW_IF_ERROR(env.ReadFileIntoBuffer(file_path.c_str(), 0, file_size, dest_span)); - - return result; -} - -std::pair MemoryMapAdapterFile(const std::filesystem::path& file_path) { - Env& env = Env::Default(); - - size_t file_size = 0; - ORT_THROW_IF_ERROR(env.GetFileLength(file_path.c_str(), file_size)); - - Env::MappedMemoryPtr result; - ORT_THROW_IF_ERROR(env.MapFileIntoMemory(file_path.c_str(), 0, file_size, result)); - - return {std::move(result), file_size}; -} - -const Adapter* ValidateAndGetAdapterFromBytes(gsl::span bytes) { - if (!IsLoraFormatModelBytes(bytes.data(), bytes.size())) { - ORT_THROW("The buffer does not appear to be a valid lora parameter format"); - } - - flatbuffers::Verifier verifier(bytes.data(), bytes.size()); - if (!VerifyAdapterBuffer(verifier)) { - ORT_THROW("The buffer fails lora adapter format verification"); - } - - auto* adapter = GetAdapter(bytes.data()); - if (!IsLoraFormatVersionSupported(adapter->format_version())) { - ORT_THROW("Unsupported lora format version"); - } - - return adapter; -} - -void SaveLoraParameter(flatbuffers::FlatBufferBuilder& flat_builder, std::string_view name, - TensorDataType data_type, gsl::span shape, - gsl::span data, - flatbuffers::Offset& fbs_tensor) { - auto name_str = (name.empty()) ? 0 : flat_builder.CreateString(name.data(), name.size()); - auto shape_vec = flat_builder.CreateVector(shape.data(), shape.size()); - auto data_vec = flat_builder.CreateVector(data.data(), data.size()); - - fbs_tensor = CreateParameter(flat_builder, name_str, shape_vec, data_type, data_vec); -} - -std::pair CreateOrtValueOverLoraParameter(const Parameter& param) { - OrtValue result; - - std::string name; - LoadStringFromLoraFormat(name, param.name()); - - const auto data_type = param.data_type(); - gsl::span shape_span(param.dims()->data(), param.dims()->size()); - - static const OrtMemoryInfo cpu_meminfo(CPU, OrtAllocatorType::OrtDeviceAllocator); - - auto elem_type = DataTypeImpl::TensorTypeFromONNXEnum(static_cast(data_type))->GetElementType(); - // const_cast is necessery due to Tensor class API - Tensor::InitOrtValue(elem_type, - TensorShape(shape_span), - const_cast(param.raw_data()->data()), - cpu_meminfo, - result); - - return std::make_pair(std::move(name), std::move(result)); -} - -OrtValue CreateOrtValueOnDevice(const OrtValue& ort_value_mapped, const AllocatorPtr& device_allocator) { - OrtValue result; - - const auto& tensor = ort_value_mapped.Get(); - Tensor on_device(tensor.DataType(), tensor.Shape(), device_allocator); - - return result; -} - -void AdapterFormatBuilder::AddParameter(const std::string& name, lora::TensorDataType data_type, - gsl::span shape, gsl::span data) { - flatbuffers::Offset fbs_param; - SaveLoraParameter(builder_, name, data_type, shape, data, fbs_param); - params_.push_back(fbs_param); -} - -std::vector AdapterFormatBuilder::Finish(int adapter_version, int model_version) { - FinishImpl(adapter_version, model_version); - - std::vector result; - result.reserve(builder_.GetSize()); - gsl::span buffer(builder_.GetBufferPointer(), builder_.GetSize()); - std::copy(buffer.begin(), buffer.end(), std::back_inserter(result)); - return result; -} - -gsl::span AdapterFormatBuilder::FinishWithSpan(int adapter_version, int model_version) { - FinishImpl(adapter_version, model_version); - return gsl::make_span(builder_.GetBufferPointer(), builder_.GetSize()); -} - -void AdapterFormatBuilder::FinishImpl(int adapter_version, int model_version) { - auto fbs_params = builder_.CreateVector(params_); - auto fbs_adapter = lora::CreateAdapter(builder_, lora::kLoraFormatVersion, adapter_version, - model_version, fbs_params); - builder_.Finish(fbs_adapter, lora::AdapterIdentifier()); -} - -} // namespace utils -} // namespace lora -} // namespace onnxruntime diff --git a/onnxruntime/lora/lora_format_utils.h b/onnxruntime/lora/lora_format_utils.h deleted file mode 100644 index e7e341945f2ca..0000000000000 --- a/onnxruntime/lora/lora_format_utils.h +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#pragma once - -#include "core/common/flatbuffers.h" -#include "core/framework/allocator.h" -#include "core/platform/env.h" - -#include -#include - -#include "lora_format/lora_schema.fbs.h" - -#include -#include -#include -#include - -struct OrtValue; - -namespace onnxruntime { -namespace lora { -namespace utils { - -///

-/// Helper class to serialize Lora adapter -/// -class AdapterFormatBuilder { - public: - AdapterFormatBuilder() = default; - - /// - /// Appends parameter tensor to the adapter builder - /// - /// parameter name - /// - /// - /// - void AddParameter(const std::string& name, lora::TensorDataType data_type, - gsl::span shape, gsl::span data); - - /// - /// Finishes serialization and returns a serialized byte vector - /// - /// - /// - /// - std::vector Finish(int adapter_version, int model_version); - - /// - /// Finishes serialization and returns a span to internal buffer. - /// - /// - /// - /// - gsl::span FinishWithSpan(int adapter_version, int model_version); - - private: - void FinishImpl(int adapter_version, int model_version); - - flatbuffers::FlatBufferBuilder builder_; - std::vector> params_; -}; - -/// -/// -/// -/// -/// -/// -bool IsLoraFormatModelBytes(const void* bytes, size_t num_bytes); - -// Will only create string in flatbuffers when has_string is true -flatbuffers::Offset SaveStringToLoraFormat(flatbuffers::FlatBufferBuilder& builder, - bool has_string, const std::string& src); - -void LoadStringFromLoraFormat(std::string& dst, const flatbuffers::String* fbs_string); - -/// -/// The function loads the lora adapter bytes from the file system -/// -/// file path -/// bytes in a vector -/// If the path can not be found -std::vector LoadLoraAdapterBytes(const std::filesystem::path& file_path); - -/// -/// This function memory maps the adapter file in memory -/// -/// -/// memory handle and file size in a tuple -std::pair MemoryMapAdapterFile(const std::filesystem::path& file_path); - -/// -/// Validates underlying format and the format version -/// -/// -/// Adapter ptr -const Adapter* ValidateAndGetAdapterFromBytes(gsl::span bytes); - -/// -/// Serializes tensor data into flatbuffer -/// -/// -/// parameter name -/// doc, optional -/// -/// -/// -/// output offset -void SaveLoraParameter(flatbuffers::FlatBufferBuilder& flat_builder, std::string_view name, - lora::TensorDataType data_type, - gsl::span shape, gsl::span data, - flatbuffers::Offset& fbs_tensor); - -/// -/// Create an OrtValue on top of the flatbuffer tensor -/// No copying of data is done here. The caller is responsible for managing the lifetime of flatbuffer -/// structures. -/// -/// In this scenario, one can memory map the entire flatbuffer tensor data into OrtValue without copying. -/// -/// -/// -std::pair CreateOrtValueOverLoraParameter(const Parameter& param); - -/// -/// Allocates OrtValue on specified device and copies data there -/// -/// parameter on CPU -/// supplied device allocator -/// -OrtValue CreateOrtValueOnDevice(const OrtValue& ort_value_mapped, const AllocatorPtr& device_allocator); - -} // namespace utils -} // namespace lora -} // namespace onnxruntime diff --git a/onnxruntime/lora/lora_format_version.h b/onnxruntime/lora/lora_format_version.h deleted file mode 100644 index 9c90a86b16382..0000000000000 --- a/onnxruntime/lora/lora_format_version.h +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#pragma once - -#include -#include - -namespace onnxruntime { -namespace lora { - -// The current model versions for saving lora parameters in flatbuffers -// Once this version is updated, the kSupportedLoraFormatVersions in IsGenAiLoraFormatModelBytes -// below will also need to be updated. -// See src/flatbuffers/schema/README.md for more details on versioning. -// Version 1 - history begins -constexpr const int kLoraFormatVersion = 1; - -// Check if the given lora format version is supported in this build -inline bool IsLoraFormatVersionSupported(const int lora_format_version) { - // The lora format versions we will support in this build - // This may contain more versions than the kLoraFormatVersion, based on the compatibilities - static constexpr std::array kSupportedLoraFormatVersions{ - kLoraFormatVersion, - }; - - const auto it = - std::find(kSupportedLoraFormatVersions.begin(), kSupportedLoraFormatVersions.end(), lora_format_version); - return it != kSupportedLoraFormatVersions.cend(); -} - -} // namespace lora -} // namespace onnxruntime diff --git a/onnxruntime/python/onnxruntime_pybind_lora.cc b/onnxruntime/python/onnxruntime_pybind_lora.cc index c99236498e5b6..72c92abcf0539 100644 --- a/onnxruntime/python/onnxruntime_pybind_lora.cc +++ b/onnxruntime/python/onnxruntime_pybind_lora.cc @@ -144,7 +144,8 @@ void addAdapterFormatMethods(pybind11::module& m) { py::class_ lora_adapter_binding(m, "LoraAdapter"); lora_adapter_binding.def(py::init()) - .def("Load", [](lora::LoraAdapter* adapter, const std::wstring& file_path) { adapter->Load(file_path); }, R"pbdoc(Memory map the specified file as LoraAdapter)pbdoc"); + .def("Load", [](lora::LoraAdapter* adapter, const std::wstring& file_path) { adapter->MemoryMap(file_path); }, + R"pbdoc(Memory map the specified file as LoraAdapter)pbdoc"); } } // namespace python From 30f3e6374dd6c8b8ab23747d85e9a26b779794e7 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 13 Sep 2024 14:08:29 -0700 Subject: [PATCH 55/84] Address build issues --- cmake/onnxruntime_session.cmake | 2 +- onnxruntime/python/onnxruntime_pybind_lora.cc | 3 +-- onnxruntime/python/onnxruntime_pybind_mlvalue.cc | 6 ++++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/cmake/onnxruntime_session.cmake b/cmake/onnxruntime_session.cmake index bb662f8ecd68c..47cf2dfc5e7aa 100644 --- a/cmake/onnxruntime_session.cmake +++ b/cmake/onnxruntime_session.cmake @@ -30,7 +30,7 @@ endif() source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_session_srcs}) onnxruntime_add_static_library(onnxruntime_session ${onnxruntime_session_srcs}) -onnxruntime_add_include_to_target(onnxruntime_session onnxruntime_common onnxruntime_lora onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface nlohmann_json::nlohmann_json) +onnxruntime_add_include_to_target(onnxruntime_session onnxruntime_common onnxruntime_framework onnxruntime_lora onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface nlohmann_json::nlohmann_json) target_link_libraries(onnxruntime_session PRIVATE onnxruntime_lora) if(onnxruntime_ENABLE_INSTRUMENT) target_compile_definitions(onnxruntime_session PUBLIC ONNXRUNTIME_ENABLE_INSTRUMENT) diff --git a/onnxruntime/python/onnxruntime_pybind_lora.cc b/onnxruntime/python/onnxruntime_pybind_lora.cc index 72c92abcf0539..099b05800905a 100644 --- a/onnxruntime/python/onnxruntime_pybind_lora.cc +++ b/onnxruntime/python/onnxruntime_pybind_lora.cc @@ -144,8 +144,7 @@ void addAdapterFormatMethods(pybind11::module& m) { py::class_ lora_adapter_binding(m, "LoraAdapter"); lora_adapter_binding.def(py::init()) - .def("Load", [](lora::LoraAdapter* adapter, const std::wstring& file_path) { adapter->MemoryMap(file_path); }, - R"pbdoc(Memory map the specified file as LoraAdapter)pbdoc"); + .def("Load", [](lora::LoraAdapter* adapter, const std::wstring& file_path) { adapter->MemoryMap(file_path); }, R"pbdoc(Memory map the specified file as LoraAdapter)pbdoc"); } } // namespace python diff --git a/onnxruntime/python/onnxruntime_pybind_mlvalue.cc b/onnxruntime/python/onnxruntime_pybind_mlvalue.cc index 010039e2e8417..6ed4c42bd4304 100644 --- a/onnxruntime/python/onnxruntime_pybind_mlvalue.cc +++ b/onnxruntime/python/onnxruntime_pybind_mlvalue.cc @@ -87,13 +87,15 @@ static TensorShape GetArrayShape(PyArrayObject* pyObject) { const int ndim = PyArray_NDIM(pyObject); const npy_intp* npy_dims = PyArray_DIMS(pyObject); auto span = gsl::make_span(npy_dims, ndim); - TensorShape shape(span); + TensorShapeVector shape_vec(span.begin(), span.end()); + TensorShape shape(shape_vec); return shape; } TensorShape GetShape(const py::array& arr) { auto span = gsl::make_span(arr.shape(), arr.ndim()); - TensorShape shape(span); + TensorShapeVector shape_vec(span.begin(), span.end()); + TensorShape shape(shape_vec); return shape; } From 583f976e1317593eddaa3ed34116128396493544 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 13 Sep 2024 16:23:53 -0700 Subject: [PATCH 56/84] Adjust linkage, fix build --- cmake/onnxruntime_unittests.cmake | 6 +++--- onnxruntime/python/onnxruntime_pybind_ortvalue.cc | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index 77962704d4bc7..87cd9e64e778d 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -1242,7 +1242,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) #onnxruntime_common is kind of ok because it is thin, tiny and totally stateless. set(onnxruntime_perf_test_libs onnx_test_runner_common onnxruntime_test_utils onnxruntime_common - onnxruntime onnxruntime_lora onnxruntime_flatbuffers onnx_test_data_proto + onnxruntime onnxruntime_flatbuffers onnx_test_data_proto ${onnxruntime_EXTERNAL_LIBRARIES} ${GETOPT_LIB_WIDE} ${SYS_PATH_LIB} ${CMAKE_DL_LIBS}) if(NOT WIN32) @@ -1255,7 +1255,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) list(APPEND onnxruntime_perf_test_libs ${android_shared_libs}) endif() if (${CMAKE_SYSTEM_NAME} MATCHES "AIX") - list(APPEND onnxruntime_perf_test_libs onnxruntime_graph onnxruntime_session onnxruntime_providers onnxruntime_lora onnxruntime_framework onnxruntime_util onnxruntime_mlas onnxruntime_optimizer onnxruntime_flatbuffers iconv re2 gtest absl_failure_signal_handler absl_examine_stack absl_flags_parse absl_flags_usage absl_flags_usage_internal) + list(APPEND onnxruntime_perf_test_libs onnxruntime_graph onnxruntime_session onnxruntime_providers onnxruntime_framework onnxruntime_util onnxruntime_mlas onnxruntime_optimizer onnxruntime_flatbuffers iconv re2 gtest absl_failure_signal_handler absl_examine_stack absl_flags_parse absl_flags_usage absl_flags_usage_internal) endif() target_link_libraries(onnxruntime_perf_test PRIVATE ${onnxruntime_perf_test_libs} Threads::Threads) if(WIN32) @@ -1339,7 +1339,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) endif() if (${CMAKE_SYSTEM_NAME} MATCHES "AIX") - list(APPEND onnxruntime_shared_lib_test_LIBS onnxruntime_graph onnxruntime_session onnxruntime_providers onnxruntime_framework onnxruntime_util onnxruntime_mlas onnxruntime_optimizer onnxruntime_lora onnxruntime_flatbuffers iconv re2) + list(APPEND onnxruntime_shared_lib_test_LIBS onnxruntime_graph onnxruntime_session onnxruntime_providers onnxruntime_framework onnxruntime_util onnxruntime_mlas onnxruntime_optimizer onnxruntime_flatbuffers iconv re2) endif() AddTest(DYN diff --git a/onnxruntime/python/onnxruntime_pybind_ortvalue.cc b/onnxruntime/python/onnxruntime_pybind_ortvalue.cc index ae41ae68bde0c..4b9b4c61e820c 100644 --- a/onnxruntime/python/onnxruntime_pybind_ortvalue.cc +++ b/onnxruntime/python/onnxruntime_pybind_ortvalue.cc @@ -160,8 +160,7 @@ void addOrtValueMethods(pybind11::module& m) { auto cpu_allocator = GetAllocator(); auto ort_value = std::make_unique(); - const auto shape_span = gsl::make_span(data.shape(), data.ndim()); - Tensor::InitOrtValue(element_type, TensorShape{shape_span}, + Tensor::InitOrtValue(element_type, GetShape(data), const_cast(data.data()), cpu_allocator->Info(), *ort_value); return ort_value; }) From b670bb9ed2adaacc9cb68fcedfd1d7222b914e2b Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Tue, 17 Sep 2024 16:48:48 -0700 Subject: [PATCH 57/84] Implement CUDA device parameter copies --- cmake/onnxruntime_lora.cmake | 4 +++ cmake/onnxruntime_python.cmake | 4 +-- onnxruntime/lora/adapter_format_utils.cc | 31 +++++++++++++++--- onnxruntime/lora/lora_adapters.cc | 21 +++++++++--- onnxruntime/lora/lora_adapters.h | 2 +- onnxruntime/test/lora/lora_test.cc | 41 +++++++++++++++++++++++- 6 files changed, 90 insertions(+), 13 deletions(-) diff --git a/cmake/onnxruntime_lora.cmake b/cmake/onnxruntime_lora.cmake index 7ba48454d997e..a2d5e4e2849cc 100644 --- a/cmake/onnxruntime_lora.cmake +++ b/cmake/onnxruntime_lora.cmake @@ -13,6 +13,10 @@ onnxruntime_add_static_library(onnxruntime_lora ${onnxruntime_lora_srcs}) onnxruntime_add_include_to_target(onnxruntime_lora onnx flatbuffers::flatbuffers Boost::mp11 ${GSL_TARGET}) target_link_libraries(onnxruntime_lora onnxruntime_framework) +if(onnxruntime_USE_CUDA) + target_link_libraries(onnxruntime_lora CUDA::cudart) +endif() + if(onnxruntime_ENABLE_INSTRUMENT) target_compile_definitions(onnxruntime_lora PUBLIC ONNXRUNTIME_ENABLE_INSTRUMENT) endif() diff --git a/cmake/onnxruntime_python.cmake b/cmake/onnxruntime_python.cmake index 90afcd4d83e18..7708debf9ff5b 100644 --- a/cmake/onnxruntime_python.cmake +++ b/cmake/onnxruntime_python.cmake @@ -71,9 +71,7 @@ onnxruntime_add_shared_library_module(onnxruntime_pybind11_state ${onnxruntime_p if(MSVC) target_compile_options(onnxruntime_pybind11_state PRIVATE "$<$:SHELL:--compiler-options /utf-8>" "$<$>:/utf-8>") - if(onnxruntime_ENABLE_TRAINING) - target_compile_options(onnxruntime_pybind11_state PRIVATE "/bigobj") - endif() + target_compile_options(onnxruntime_pybind11_state PRIVATE "/bigobj") endif() if(HAS_CAST_FUNCTION_TYPE) target_compile_options(onnxruntime_pybind11_state PRIVATE "-Wno-cast-function-type") diff --git a/onnxruntime/lora/adapter_format_utils.cc b/onnxruntime/lora/adapter_format_utils.cc index 19781e3edf8e7..68aa10de46bb4 100644 --- a/onnxruntime/lora/adapter_format_utils.cc +++ b/onnxruntime/lora/adapter_format_utils.cc @@ -4,6 +4,7 @@ #include "adapter_format_utils.h" #include "adapter_format_version.h" +#include "core/framework/allocator.h" #include "core/common/common.h" #include "core/common/span_utils.h" #include "core/framework/ortdevice.h" @@ -11,6 +12,10 @@ #include "core/framework/ort_value.h" #include "core/framework/tensor.h" +#ifdef USE_CUDA +#include "cuda_runtime.h" +#endif + #include namespace onnxruntime { @@ -115,12 +120,30 @@ std::pair CreateOrtValueOverLoraParameter(const Parameter return std::make_pair(std::move(name), std::move(result)); } +// XXX: Figure out how to implement DML copy. +static void CopyOnDevice(const Tensor& src, Tensor& dst) { + const auto& mem_info = dst.Location(); + + if (strcmp(mem_info.name, onnxruntime::CUDA) == 0) { +#ifdef USE_CUDA + auto ret = cudaMemcpy(dst.MutableDataRaw(), src.DataRaw(), src.SizeInBytes(), cudaMemcpyHostToDevice); + if (ret != cudaSuccess) { + ORT_THROW("cudaMemcpy failed. Return code: ", ret); + } +#else + ORT_NOT_IMPLEMENTED("Destination provider not available, copy failed"); +#endif + } else { + ORT_NOT_IMPLEMENTED("Destination device is currently not supported"); + } +} + OrtValue CreateOrtValueOnDevice(const OrtValue& ort_value_mapped, const AllocatorPtr& device_allocator) { OrtValue result; - - const auto& tensor = ort_value_mapped.Get(); - Tensor on_device(tensor.DataType(), tensor.Shape(), device_allocator); - + const auto& src = ort_value_mapped.Get(); + Tensor on_device(src.DataType(), src.Shape(), device_allocator); + CopyOnDevice(src, on_device); + Tensor::InitOrtValue(std::move(on_device), result); return result; } diff --git a/onnxruntime/lora/lora_adapters.cc b/onnxruntime/lora/lora_adapters.cc index e400491769b01..b5a105b4dcc80 100644 --- a/onnxruntime/lora/lora_adapters.cc +++ b/onnxruntime/lora/lora_adapters.cc @@ -5,6 +5,7 @@ #include "adapter_format_utils.h" #include "core/session/onnxruntime_c_api.h" +#include "core/session/allocator_adapters.h" #include "core/session/ort_apis.h" #include "core/framework/error_code_helper.h" @@ -51,8 +52,13 @@ void LoraAdapter::InitializeParamsValues() { params_values.reserve(params->size()); for (const auto* param : *params) { auto [name, ort_value] = adapters::utils::CreateOrtValueOverLoraParameter(*param); - Param lora_param(std::move(ort_value)); - params_values.emplace(std::move(name), std::move(lora_param)); + if (device_allocator_) { + auto ort_value_ondevice = adapters::utils::CreateOrtValueOnDevice(ort_value, device_allocator_); + Param lora_param(std::move(ort_value), std::move(ort_value_ondevice)); + params_values.emplace(std::move(name), std::move(lora_param)); + } else { + Param lora_param(std::move(ort_value)); + } } params_values_.swap(params_values); } @@ -69,10 +75,17 @@ size_t LoraAdapter::GetBufferSize() const { } // namespace lora } // namespace onnxruntime -ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapter, _In_ const ORTCHAR_T* adapter_file_path, _In_ OrtAllocator* /* allocator */, +ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapter, _In_ const ORTCHAR_T* adapter_file_path, _In_ OrtAllocator* allocator, _Outptr_ OrtLoraAdapter** adapter) { API_IMPL_BEGIN - auto lora_adapter = std::make_unique(); + + std::unique_ptr lora_adapter; + if (allocator != nullptr) { + auto alloc_ptr = std::make_shared(allocator); + lora_adapter = std::make_unique(std::move(alloc_ptr)); + } else { + lora_adapter = std::make_unique(); + } // For platforms that do not support Memmap, we can #ifdef it to ->Load(adapter_file_path) lora_adapter->MemoryMap(adapter_file_path); *adapter = reinterpret_cast(lora_adapter.release()); diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h index d8b5e56d5f861..869fbf89ea756 100644 --- a/onnxruntime/lora/lora_adapters.h +++ b/onnxruntime/lora/lora_adapters.h @@ -144,7 +144,7 @@ class LoraAdapter { for (const auto& [name, param] : params_values_) { *names_out = name.c_str(); ++names_out; - *tensor_out = ¶m.GetMapped(); + *tensor_out = ¶m.GetDeviceOrMapped(); ++tensor_out; } } diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index c5f9138f280dc..57bf51c59f64e 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -5,13 +5,17 @@ #include "core/framework/data_types_internal.h" #include "core/framework/to_tensor_proto_element_type.h" +#include "test/util/include/default_providers.h" + #include "lora/lora_adapters.h" #include "lora/adapter_format_version.h" #include "lora/adapter_format_utils.h" -#include "gtest/gtest.h" +#include "gtest/gtest.h" #include +#include "test/util/include/asserts.h" + namespace onnxruntime { namespace test { @@ -188,5 +192,40 @@ TEST(LoraAdapterTest, Load) { } } +#ifdef USE_CUDA +TEST(LoraAdapterTest, VerifyDeviceCopy) { + auto cpu_ep = DefaultCpuExecutionProvider(); + auto cpu_allocator = cpu_ep->CreatePreferredAllocators()[0]; + auto cuda_ep = DefaultCudaExecutionProvider(); + auto cuda_allocator = cuda_ep->CreatePreferredAllocators()[0]; + + auto gpu_transfer = cuda_ep->GetDataTransfer(); + + auto test_params = GenerateTestParameters()(); + lora::LoraAdapter adapter(std::move(cuda_allocator)); + adapter.Load(std::move(test_params)); + + auto [begin, end] = adapter.GetParamIterators(); + for (; begin != end; ++begin) { + const auto& [_, param] = *begin; + const auto& tensor_device = param.GetDeviceOrMapped().Get(); + ASSERT_EQ(0, strcmp(tensor_device.Location().name, onnxruntime::CUDA)); + + const auto& tensor_cpu = param.GetMapped().Get(); + ASSERT_EQ(tensor_cpu.Shape().Size(), tensor_device.Shape().Size()); + + Tensor copy(tensor_cpu.DataType(), tensor_cpu.Shape(), cpu_allocator); + ASSERT_TRUE(gpu_transfer->CanCopy(tensor_device.Location().device, + copy.Location().device)); + ASSERT_STATUS_OK(gpu_transfer->CopyTensor(tensor_device, copy)); + + auto expected_span = tensor_cpu.DataAsSpan(); + auto copy_span = copy.DataAsSpan(); + + ASSERT_EQ(expected_span, copy_span); + } +} +#endif + } // namespace test } // namespace onnxruntime From 3d48a6ddd2376898ed44572f6b10ea07d7641c91 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 20 Sep 2024 14:39:47 -0700 Subject: [PATCH 58/84] Address review comments --- onnxruntime/python/onnxruntime_pybind_state.cc | 1 - onnxruntime/test/testdata/lora/two_params_lora_model.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 757f2f6fa67b4..73d0de4288bf2 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -2041,7 +2041,6 @@ including arg name, arg type (contains both type and shape).)pbdoc") auto [begin, end] = adapter->GetParamIterators(); for (; begin != end; ++begin) { const auto& [name, param] = *begin; - std::cout << name << ':'; feeds.insert(std::make_pair(name, param.GetMapped())); } } diff --git a/onnxruntime/test/testdata/lora/two_params_lora_model.py b/onnxruntime/test/testdata/lora/two_params_lora_model.py index 8e51c4395f67e..bb3c58f1f63a0 100644 --- a/onnxruntime/test/testdata/lora/two_params_lora_model.py +++ b/onnxruntime/test/testdata/lora/two_params_lora_model.py @@ -5,8 +5,8 @@ import onnxruntime as ort -model_path = "C:/dev/ort_main/onnxruntime/test/testdata/lora/two_params_lora_model.onnx" -adapter_path = "C:/dev/ort_main/onnxruntime/test/testdata/lora/two_params_lora_model.onnx_adapter" +model_path = "two_params_lora_model.onnx" +adapter_path = "two_params_lora_model.onnx_adapter" def create_model(model_path: os.PathLike): From 0d67e2fd6bd71e86c0c0424e050ca08a0a2cf017 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 20 Sep 2024 15:26:08 -0700 Subject: [PATCH 59/84] Add adapter test --- .../test/python/onnxruntime_test_python.py | 28 +++++++++++++++++++ .../testdata/lora/two_params_lora_model.py | 1 + 2 files changed, 29 insertions(+) diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py index b82ea3514e741..b633976f4953b 100644 --- a/onnxruntime/test/python/onnxruntime_test_python.py +++ b/onnxruntime/test/python/onnxruntime_test_python.py @@ -1870,6 +1870,34 @@ def test_adater_export_read(self): self.assertEqual(expected_val.shape(), value.shape()) np.testing.assert_allclose(expected_val.numpy(), value.numpy()) + def test_run_with_adapter(self): + adapter_path = get_name("lora/two_params_lora_model.onnx_adapter") + model_path = get_name("lora/two_params_lora_model.onnx") + + expected_output = np.array( + [ + [154.0, 176.0, 198.0, 220.0], + [154.0, 176.0, 198.0, 220.0], + [154.0, 176.0, 198.0, 220.0], + [154.0, 176.0, 198.0, 220.0], + ], + dtype=np.float32, + ) + + adapter = onnxrt.LoraAdapter() + adapter.Load(adapter_path) + + run_options = onnxrt.RunOptions() + run_options.set_adapter_active(adapter) + session = onnxrt.InferenceSession(model_path) + + inputs = {"input_x": np.ones((4, 4), dtype=np.float32)} + + outputs = session.run(None, inputs, run_options) + print("Run with adapter output:") + self.assertEqual(len(outputs), 1) + self.assertTrue(np.allclose(outputs[0], expected_output)) + if __name__ == "__main__": unittest.main(verbosity=1) diff --git a/onnxruntime/test/testdata/lora/two_params_lora_model.py b/onnxruntime/test/testdata/lora/two_params_lora_model.py index bb3c58f1f63a0..8f43e054217da 100644 --- a/onnxruntime/test/testdata/lora/two_params_lora_model.py +++ b/onnxruntime/test/testdata/lora/two_params_lora_model.py @@ -140,6 +140,7 @@ def run_with_adapter(model_path: os.PathLike, adapter_path: os.PathLike): inputs = {"input_x": np.ones((4, 4), dtype=np.float32)} outputs = session.run(None, inputs, run_options) + print(outputs) From 827b381511f8ebdbe6b6ad957c7a734491a0f081 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 20 Sep 2024 18:42:22 -0700 Subject: [PATCH 60/84] Add base model tests --- .../test/python/onnxruntime_test_python.py | 22 ++++++++++- onnxruntime/test/shared_lib/test_inference.cc | 39 +++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py index b633976f4953b..ed1ac66fc8258 100644 --- a/onnxruntime/test/python/onnxruntime_test_python.py +++ b/onnxruntime/test/python/onnxruntime_test_python.py @@ -1894,10 +1894,30 @@ def test_run_with_adapter(self): inputs = {"input_x": np.ones((4, 4), dtype=np.float32)} outputs = session.run(None, inputs, run_options) - print("Run with adapter output:") self.assertEqual(len(outputs), 1) self.assertTrue(np.allclose(outputs[0], expected_output)) + def test_run_base_model(self): + model_path = get_name("lora/two_params_lora_model.onnx") + + expected_output = np.array( + [[28., 32., 36., 40.], + [28., 32., 36., 40.], + [28., 32., 36., 40.], + [28., 32., 36., 40.]], + dtype=np.float32, + ) + + run_options = onnxrt.RunOptions() + session = onnxrt.InferenceSession(model_path) + + inputs = {"input_x": np.ones((4, 4), dtype=np.float32)} + + outputs = session.run(None, inputs, run_options) + self.assertEqual(len(outputs), 1) + self.assertTrue(np.allclose(outputs[0], expected_output)) + + if __name__ == "__main__": unittest.main(verbosity=1) diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc index 9308075fe52a5..b66f773a7267f 100644 --- a/onnxruntime/test/shared_lib/test_inference.cc +++ b/onnxruntime/test/shared_lib/test_inference.cc @@ -4448,6 +4448,45 @@ TEST(CApiTest, RunWithLoraAdapter) { } } +TEST(CApiTest, RunBaseLoraModel) { + constexpr const ORTCHAR_T* model_path = TSTR("testdata/lora/two_params_lora_model_optimized.onnx"); + Ort::Env env(ORT_LOGGING_LEVEL_WARNING); + constexpr const std::array input_shape = {4, 4}; + std::vector input_x(16); + std::fill(input_x.begin(), input_x.end(), 1.0f); + constexpr const char* input_names[] = {"input_x"}; + constexpr const char* output_names[] = {"output"}; + + auto cpu_meminfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); + + auto input_x_val = Ort::Value::CreateTensor( + cpu_meminfo, input_x.data(), input_x.size(), input_shape.data(), input_shape.size()); + + Ort::Value inputs[] = {std::move(input_x_val)}; + + Ort::SessionOptions default_session_options; + + constexpr const std::array expected_output = { + 28.f, 32.f, 36.f, 40.f, + 28.f, 32.f, 36.f, 40.f, + 28.f, 32.f, 36.f, 40.f, + 28.f, 32.f, 36.f, 40.f}; + + Ort::Session session(env, model_path, default_session_options); + + Ort::RunOptions run_options; + auto outputs = session.Run(run_options, input_names, inputs, std::size(input_names), output_names, std::size(output_names)); + ASSERT_EQ(1U, outputs.size()); + + auto tensor_type_shape = outputs[0].GetTensorTypeAndShapeInfo(); + const auto elements = tensor_type_shape.GetElementCount(); + ASSERT_EQ(expected_output.size(), elements); + const float* data = outputs[0].GetTensorData(); + for (size_t i = 0; i < elements; ++i) { + EXPECT_NEAR(expected_output[i], data[i], 0.06); + } +} + struct MockGQA : public OrtCustomOp { MockGQA() { OrtCustomOp::GetMayInplace = [](int** input_index, int** output_index) { From 6230f628255d28bd68219a5705e2185fdd726746 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Sat, 21 Sep 2024 18:37:47 -0700 Subject: [PATCH 61/84] Lint and fix up test model path --- onnxruntime/test/python/onnxruntime_test_python.py | 6 +----- onnxruntime/test/shared_lib/test_inference.cc | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py index ed1ac66fc8258..3ce1956f1734b 100644 --- a/onnxruntime/test/python/onnxruntime_test_python.py +++ b/onnxruntime/test/python/onnxruntime_test_python.py @@ -1901,10 +1901,7 @@ def test_run_base_model(self): model_path = get_name("lora/two_params_lora_model.onnx") expected_output = np.array( - [[28., 32., 36., 40.], - [28., 32., 36., 40.], - [28., 32., 36., 40.], - [28., 32., 36., 40.]], + [[28.0, 32.0, 36.0, 40.0], [28.0, 32.0, 36.0, 40.0], [28.0, 32.0, 36.0, 40.0], [28.0, 32.0, 36.0, 40.0]], dtype=np.float32, ) @@ -1918,6 +1915,5 @@ def test_run_base_model(self): self.assertTrue(np.allclose(outputs[0], expected_output)) - if __name__ == "__main__": unittest.main(verbosity=1) diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc index b66f773a7267f..d34348ee21b2b 100644 --- a/onnxruntime/test/shared_lib/test_inference.cc +++ b/onnxruntime/test/shared_lib/test_inference.cc @@ -4449,7 +4449,7 @@ TEST(CApiTest, RunWithLoraAdapter) { } TEST(CApiTest, RunBaseLoraModel) { - constexpr const ORTCHAR_T* model_path = TSTR("testdata/lora/two_params_lora_model_optimized.onnx"); + constexpr const ORTCHAR_T* model_path = TSTR("testdata/lora/two_params_lora_model.onnx"); Ort::Env env(ORT_LOGGING_LEVEL_WARNING); constexpr const std::array input_shape = {4, 4}; std::vector input_x(16); From 4b90c877589f91f70dc8800985094c48b7ca4d9d Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Sun, 22 Sep 2024 14:27:43 -0700 Subject: [PATCH 62/84] Fix CPU only bug --- onnxruntime/lora/adapter_format_utils.cc | 2 +- onnxruntime/lora/lora_adapters.cc | 3 ++- onnxruntime/lora/lora_adapters.h | 7 ++++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/onnxruntime/lora/adapter_format_utils.cc b/onnxruntime/lora/adapter_format_utils.cc index 68aa10de46bb4..863a065f24955 100644 --- a/onnxruntime/lora/adapter_format_utils.cc +++ b/onnxruntime/lora/adapter_format_utils.cc @@ -121,7 +121,7 @@ std::pair CreateOrtValueOverLoraParameter(const Parameter } // XXX: Figure out how to implement DML copy. -static void CopyOnDevice(const Tensor& src, Tensor& dst) { +static void CopyOnDevice([[maybe_unused]] const Tensor& src, Tensor& dst) { const auto& mem_info = dst.Location(); if (strcmp(mem_info.name, onnxruntime::CUDA) == 0) { diff --git a/onnxruntime/lora/lora_adapters.cc b/onnxruntime/lora/lora_adapters.cc index b5a105b4dcc80..cb86cd4c56214 100644 --- a/onnxruntime/lora/lora_adapters.cc +++ b/onnxruntime/lora/lora_adapters.cc @@ -48,7 +48,7 @@ void LoraAdapter::InitializeParamsValues() { } const auto* params = adapter_->parameters(); - InlinedHashMap params_values; + std::unordered_map params_values; params_values.reserve(params->size()); for (const auto* param : *params) { auto [name, ort_value] = adapters::utils::CreateOrtValueOverLoraParameter(*param); @@ -58,6 +58,7 @@ void LoraAdapter::InitializeParamsValues() { params_values.emplace(std::move(name), std::move(lora_param)); } else { Param lora_param(std::move(ort_value)); + params_values.emplace(std::move(name), std::move(lora_param)); } } params_values_.swap(params_values); diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/lora/lora_adapters.h index 869fbf89ea756..4d6d7ff2a8344 100644 --- a/onnxruntime/lora/lora_adapters.h +++ b/onnxruntime/lora/lora_adapters.h @@ -15,6 +15,7 @@ #include #include #include +#include namespace onnxruntime { namespace lora { @@ -64,8 +65,8 @@ class LoraAdapter { OrtValue ort_value_device_; }; - using param_const_iterator = InlinedHashMap::const_iterator; - using param_iterator = InlinedHashMap::iterator; + using param_const_iterator = std::unordered_map::const_iterator; + using param_iterator = std::unordered_map::iterator; /// /// Obtain a range of the iterators @@ -170,7 +171,7 @@ class LoraAdapter { AllocatorPtr device_allocator_; const adapters::Adapter* adapter_{nullptr}; - InlinedHashMap params_values_; + std::unordered_map params_values_; }; } // namespace lora From 38b3132d0313ec2646949875249e9b80621aa438 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Mon, 23 Sep 2024 09:35:00 -0700 Subject: [PATCH 63/84] Re-work ifdefs to avoid unreachable code warning --- onnxruntime/lora/adapter_format_utils.cc | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/onnxruntime/lora/adapter_format_utils.cc b/onnxruntime/lora/adapter_format_utils.cc index 863a065f24955..7a9cf7d111c1d 100644 --- a/onnxruntime/lora/adapter_format_utils.cc +++ b/onnxruntime/lora/adapter_format_utils.cc @@ -120,29 +120,26 @@ std::pair CreateOrtValueOverLoraParameter(const Parameter return std::make_pair(std::move(name), std::move(result)); } -// XXX: Figure out how to implement DML copy. -static void CopyOnDevice([[maybe_unused]] const Tensor& src, Tensor& dst) { +static Status CopyOnDevice([[maybe_unused]] const Tensor& src, Tensor& dst) { const auto& mem_info = dst.Location(); if (strcmp(mem_info.name, onnxruntime::CUDA) == 0) { #ifdef USE_CUDA auto ret = cudaMemcpy(dst.MutableDataRaw(), src.DataRaw(), src.SizeInBytes(), cudaMemcpyHostToDevice); - if (ret != cudaSuccess) { - ORT_THROW("cudaMemcpy failed. Return code: ", ret); - } + ORT_RETURN_IF_NOT(ret == cudaSuccess, "cudaMemcpy failed. Return code: ", ret); #else - ORT_NOT_IMPLEMENTED("Destination provider not available, copy failed"); + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Destination provider: ", + mem_info.name, " not available, copy failed"); #endif - } else { - ORT_NOT_IMPLEMENTED("Destination device is currently not supported"); } + return Status::OK(); } OrtValue CreateOrtValueOnDevice(const OrtValue& ort_value_mapped, const AllocatorPtr& device_allocator) { OrtValue result; const auto& src = ort_value_mapped.Get(); Tensor on_device(src.DataType(), src.Shape(), device_allocator); - CopyOnDevice(src, on_device); + ORT_THROW_IF_ERROR(CopyOnDevice(src, on_device)); Tensor::InitOrtValue(std::move(on_device), result); return result; } From bbdc9aef4a3d18e475e4f43e456ec6004882d979 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Mon, 23 Sep 2024 09:39:23 -0700 Subject: [PATCH 64/84] Add check for CPU destination --- onnxruntime/lora/adapter_format_utils.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/onnxruntime/lora/adapter_format_utils.cc b/onnxruntime/lora/adapter_format_utils.cc index 7a9cf7d111c1d..90865b81089c1 100644 --- a/onnxruntime/lora/adapter_format_utils.cc +++ b/onnxruntime/lora/adapter_format_utils.cc @@ -123,6 +123,8 @@ std::pair CreateOrtValueOverLoraParameter(const Parameter static Status CopyOnDevice([[maybe_unused]] const Tensor& src, Tensor& dst) { const auto& mem_info = dst.Location(); + ORT_RETURN_IF(mem_info.device.Type() == OrtDevice::CPU, "Destination must not be on CPU"); + if (strcmp(mem_info.name, onnxruntime::CUDA) == 0) { #ifdef USE_CUDA auto ret = cudaMemcpy(dst.MutableDataRaw(), src.DataRaw(), src.SizeInBytes(), cudaMemcpyHostToDevice); From d6594c73d61a346db3139524f65d947e3cf770ac Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Mon, 23 Sep 2024 12:33:17 -0700 Subject: [PATCH 65/84] Address a regression --- onnxruntime/core/session/onnxruntime_c_api.cc | 30 ++++++++++++------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index 69cc55d0a4a6f..c3f9c4ea42acb 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -857,17 +857,25 @@ ORT_API_STATUS_IMPL(OrtApis::Run, _Inout_ OrtSession* sess, _In_opt_ const OrtRu auto output_span = gsl::make_span(output, output_names_len); Status status; - if (run_options != nullptr && !run_options->active_adapters_.empty()) { - InlinedVector input_names_with_lora; - InlinedVector input_with_lora; - - CheckAndAdjustForLora(*run_options, input_names_with_lora, input_with_lora, input_names_span, input_span); - - status = session->Run(*run_options, - input_names_span, - input_span, - output_name_span, - output_span); + if (run_options != nullptr) { + if (!run_options->active_adapters_.empty()) { + InlinedVector input_names_with_lora; + InlinedVector input_with_lora; + + CheckAndAdjustForLora(*run_options, input_names_with_lora, input_with_lora, input_names_span, input_span); + + status = session->Run(*run_options, + input_names_span, + input_span, + output_name_span, + output_span); + } else { + status = session->Run(*run_options, + input_names_span, + input_span, + output_name_span, + output_span); + } } else { const RunOptions default_run_options; status = session->Run(default_run_options, From 35c0a599d0ff4851e55767c2cc87c4181e7daeaf Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Mon, 23 Sep 2024 14:48:22 -0700 Subject: [PATCH 66/84] Fix build warning --- onnxruntime/test/lora/lora_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index 57bf51c59f64e..0769e152b6edf 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -108,7 +108,7 @@ auto verify_load = [](const lora::LoraAdapter& adapter) { ASSERT_NE(tensor.GetElementType(), ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED); const auto shape = tensor.Shape().GetDims(); - ASSERT_EQ(2, shape.size()); + ASSERT_EQ(2U, shape.size()); ASSERT_EQ(8, shape[0]); ASSERT_EQ(4, shape[1]); From 34bb3a0587ae8c107d0d26acb48eb994cb8d11af Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Tue, 24 Sep 2024 11:12:18 -0700 Subject: [PATCH 67/84] Remove temp CopyOnDevice --- cmake/onnxruntime_lora.cmake | 4 --- onnxruntime/lora/adapter_format_utils.cc | 26 ------------------ onnxruntime/lora/adapter_format_utils.h | 9 ------ onnxruntime/lora/lora_adapters.cc | 10 ++----- onnxruntime/test/lora/lora_test.cc | 35 ------------------------ 5 files changed, 2 insertions(+), 82 deletions(-) diff --git a/cmake/onnxruntime_lora.cmake b/cmake/onnxruntime_lora.cmake index a2d5e4e2849cc..7ba48454d997e 100644 --- a/cmake/onnxruntime_lora.cmake +++ b/cmake/onnxruntime_lora.cmake @@ -13,10 +13,6 @@ onnxruntime_add_static_library(onnxruntime_lora ${onnxruntime_lora_srcs}) onnxruntime_add_include_to_target(onnxruntime_lora onnx flatbuffers::flatbuffers Boost::mp11 ${GSL_TARGET}) target_link_libraries(onnxruntime_lora onnxruntime_framework) -if(onnxruntime_USE_CUDA) - target_link_libraries(onnxruntime_lora CUDA::cudart) -endif() - if(onnxruntime_ENABLE_INSTRUMENT) target_compile_definitions(onnxruntime_lora PUBLIC ONNXRUNTIME_ENABLE_INSTRUMENT) endif() diff --git a/onnxruntime/lora/adapter_format_utils.cc b/onnxruntime/lora/adapter_format_utils.cc index 90865b81089c1..d3da015d03369 100644 --- a/onnxruntime/lora/adapter_format_utils.cc +++ b/onnxruntime/lora/adapter_format_utils.cc @@ -120,32 +120,6 @@ std::pair CreateOrtValueOverLoraParameter(const Parameter return std::make_pair(std::move(name), std::move(result)); } -static Status CopyOnDevice([[maybe_unused]] const Tensor& src, Tensor& dst) { - const auto& mem_info = dst.Location(); - - ORT_RETURN_IF(mem_info.device.Type() == OrtDevice::CPU, "Destination must not be on CPU"); - - if (strcmp(mem_info.name, onnxruntime::CUDA) == 0) { -#ifdef USE_CUDA - auto ret = cudaMemcpy(dst.MutableDataRaw(), src.DataRaw(), src.SizeInBytes(), cudaMemcpyHostToDevice); - ORT_RETURN_IF_NOT(ret == cudaSuccess, "cudaMemcpy failed. Return code: ", ret); -#else - return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Destination provider: ", - mem_info.name, " not available, copy failed"); -#endif - } - return Status::OK(); -} - -OrtValue CreateOrtValueOnDevice(const OrtValue& ort_value_mapped, const AllocatorPtr& device_allocator) { - OrtValue result; - const auto& src = ort_value_mapped.Get(); - Tensor on_device(src.DataType(), src.Shape(), device_allocator); - ORT_THROW_IF_ERROR(CopyOnDevice(src, on_device)); - Tensor::InitOrtValue(std::move(on_device), result); - return result; -} - void AdapterFormatBuilder::AddParameter(const std::string& name, TensorDataType data_type, gsl::span shape, gsl::span data) { flatbuffers::Offset fbs_param; diff --git a/onnxruntime/lora/adapter_format_utils.h b/onnxruntime/lora/adapter_format_utils.h index 922b88f8443c1..576872b86feaf 100644 --- a/onnxruntime/lora/adapter_format_utils.h +++ b/onnxruntime/lora/adapter_format_utils.h @@ -124,15 +124,6 @@ void SaveLoraParameter(flatbuffers::FlatBufferBuilder& flat_builder, std::string /// /// std::pair CreateOrtValueOverLoraParameter(const Parameter& param); - -/// -/// Allocates OrtValue on specified device and copies data there -/// -/// parameter on CPU -/// supplied device allocator -/// -OrtValue CreateOrtValueOnDevice(const OrtValue& ort_value_mapped, const AllocatorPtr& device_allocator); - } // namespace utils } // namespace adapters } // namespace onnxruntime diff --git a/onnxruntime/lora/lora_adapters.cc b/onnxruntime/lora/lora_adapters.cc index cb86cd4c56214..a543ce7ac2f52 100644 --- a/onnxruntime/lora/lora_adapters.cc +++ b/onnxruntime/lora/lora_adapters.cc @@ -52,14 +52,8 @@ void LoraAdapter::InitializeParamsValues() { params_values.reserve(params->size()); for (const auto* param : *params) { auto [name, ort_value] = adapters::utils::CreateOrtValueOverLoraParameter(*param); - if (device_allocator_) { - auto ort_value_ondevice = adapters::utils::CreateOrtValueOnDevice(ort_value, device_allocator_); - Param lora_param(std::move(ort_value), std::move(ort_value_ondevice)); - params_values.emplace(std::move(name), std::move(lora_param)); - } else { - Param lora_param(std::move(ort_value)); - params_values.emplace(std::move(name), std::move(lora_param)); - } + Param lora_param(std::move(ort_value)); + params_values.emplace(std::move(name), std::move(lora_param)); } params_values_.swap(params_values); } diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index 0769e152b6edf..bab99b88dec8c 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -192,40 +192,5 @@ TEST(LoraAdapterTest, Load) { } } -#ifdef USE_CUDA -TEST(LoraAdapterTest, VerifyDeviceCopy) { - auto cpu_ep = DefaultCpuExecutionProvider(); - auto cpu_allocator = cpu_ep->CreatePreferredAllocators()[0]; - auto cuda_ep = DefaultCudaExecutionProvider(); - auto cuda_allocator = cuda_ep->CreatePreferredAllocators()[0]; - - auto gpu_transfer = cuda_ep->GetDataTransfer(); - - auto test_params = GenerateTestParameters()(); - lora::LoraAdapter adapter(std::move(cuda_allocator)); - adapter.Load(std::move(test_params)); - - auto [begin, end] = adapter.GetParamIterators(); - for (; begin != end; ++begin) { - const auto& [_, param] = *begin; - const auto& tensor_device = param.GetDeviceOrMapped().Get(); - ASSERT_EQ(0, strcmp(tensor_device.Location().name, onnxruntime::CUDA)); - - const auto& tensor_cpu = param.GetMapped().Get(); - ASSERT_EQ(tensor_cpu.Shape().Size(), tensor_device.Shape().Size()); - - Tensor copy(tensor_cpu.DataType(), tensor_cpu.Shape(), cpu_allocator); - ASSERT_TRUE(gpu_transfer->CanCopy(tensor_device.Location().device, - copy.Location().device)); - ASSERT_STATUS_OK(gpu_transfer->CopyTensor(tensor_device, copy)); - - auto expected_span = tensor_cpu.DataAsSpan(); - auto copy_span = copy.DataAsSpan(); - - ASSERT_EQ(expected_span, copy_span); - } -} -#endif - } // namespace test } // namespace onnxruntime From 0b569f616dcbb2e265de8fd970d2e6310f744818 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Tue, 24 Sep 2024 12:25:35 -0700 Subject: [PATCH 68/84] Remove stray include --- onnxruntime/lora/adapter_format_utils.cc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/onnxruntime/lora/adapter_format_utils.cc b/onnxruntime/lora/adapter_format_utils.cc index d3da015d03369..f9570376a8386 100644 --- a/onnxruntime/lora/adapter_format_utils.cc +++ b/onnxruntime/lora/adapter_format_utils.cc @@ -12,10 +12,6 @@ #include "core/framework/ort_value.h" #include "core/framework/tensor.h" -#ifdef USE_CUDA -#include "cuda_runtime.h" -#endif - #include namespace onnxruntime { From 2e8f7dd5e123eb3d4dd4e1dc8faf54f21b0b6263 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Tue, 24 Sep 2024 13:43:59 -0700 Subject: [PATCH 69/84] Move lora_adapters files to core/session --- onnxruntime/core/framework/run_options.cc | 1 - onnxruntime/{lora => core/session}/lora_adapters.cc | 2 +- onnxruntime/{lora => core/session}/lora_adapters.h | 0 onnxruntime/core/session/onnxruntime_c_api.cc | 2 +- onnxruntime/python/onnxruntime_pybind_lora.cc | 2 +- onnxruntime/python/onnxruntime_pybind_state.cc | 2 +- onnxruntime/test/lora/lora_test.cc | 2 +- 7 files changed, 5 insertions(+), 6 deletions(-) rename onnxruntime/{lora => core/session}/lora_adapters.cc (98%) rename onnxruntime/{lora => core/session}/lora_adapters.h (100%) diff --git a/onnxruntime/core/framework/run_options.cc b/onnxruntime/core/framework/run_options.cc index b0ea7f7c9d843..743633f157930 100644 --- a/onnxruntime/core/framework/run_options.cc +++ b/onnxruntime/core/framework/run_options.cc @@ -2,7 +2,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. #include "core/framework/run_options.h" -#include "lora/lora_adapters.h" #include "core/session/onnxruntime_c_api.h" #include "core/session/ort_apis.h" #include "core/framework/error_code_helper.h" diff --git a/onnxruntime/lora/lora_adapters.cc b/onnxruntime/core/session/lora_adapters.cc similarity index 98% rename from onnxruntime/lora/lora_adapters.cc rename to onnxruntime/core/session/lora_adapters.cc index a543ce7ac2f52..59930f6528ed4 100644 --- a/onnxruntime/lora/lora_adapters.cc +++ b/onnxruntime/core/session/lora_adapters.cc @@ -2,7 +2,7 @@ // Licensed under the MIT License. #include "lora_adapters.h" -#include "adapter_format_utils.h" +#include "lora/adapter_format_utils.h" #include "core/session/onnxruntime_c_api.h" #include "core/session/allocator_adapters.h" diff --git a/onnxruntime/lora/lora_adapters.h b/onnxruntime/core/session/lora_adapters.h similarity index 100% rename from onnxruntime/lora/lora_adapters.h rename to onnxruntime/core/session/lora_adapters.h diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index c3f9c4ea42acb..48fd3584ec0f7 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -39,7 +39,7 @@ #include "core/platform/ort_mutex.h" #include "core/common/string_helper.h" -#include "lora/lora_adapters.h" +#include "lora_adapters.h" #ifdef USE_CUDA #include "core/providers/cuda/cuda_provider_factory.h" diff --git a/onnxruntime/python/onnxruntime_pybind_lora.cc b/onnxruntime/python/onnxruntime_pybind_lora.cc index 099b05800905a..af8365418e5ea 100644 --- a/onnxruntime/python/onnxruntime_pybind_lora.cc +++ b/onnxruntime/python/onnxruntime_pybind_lora.cc @@ -16,7 +16,7 @@ #include "lora/adapter_format_version.h" #include "lora/adapter_format_utils.h" -#include "lora/lora_adapters.h" +#include "core/session/lora_adapters.h" #include diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 73d0de4288bf2..645d44011765c 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -32,7 +32,7 @@ #include "core/session/onnxruntime_session_options_config_keys.h" #include "core/session/provider_bridge_ort.h" -#include "lora/lora_adapters.h" +#include "core/session/lora_adapters.h" #ifdef ENABLE_ATEN #include "contrib_ops/cpu/aten_ops/aten_op_executor.h" diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index bab99b88dec8c..c759dfe5fb0ca 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -7,7 +7,7 @@ #include "test/util/include/default_providers.h" -#include "lora/lora_adapters.h" +#include "core/session/lora_adapters.h" #include "lora/adapter_format_version.h" #include "lora/adapter_format_utils.h" From 2daa850785c459512e307accae23f4ed2d906c7e Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Tue, 24 Sep 2024 17:22:29 -0700 Subject: [PATCH 70/84] Rework copy on device --- onnxruntime/core/session/lora_adapters.cc | 60 +++++++++++++++++++++-- onnxruntime/test/lora/lora_test.cc | 34 +++++++++++++ 2 files changed, 91 insertions(+), 3 deletions(-) diff --git a/onnxruntime/core/session/lora_adapters.cc b/onnxruntime/core/session/lora_adapters.cc index 59930f6528ed4..69e1fca702516 100644 --- a/onnxruntime/core/session/lora_adapters.cc +++ b/onnxruntime/core/session/lora_adapters.cc @@ -4,15 +4,25 @@ #include "lora_adapters.h" #include "lora/adapter_format_utils.h" +#include "core/framework/data_transfer.h" +#include "core/framework/error_code_helper.h" #include "core/session/onnxruntime_c_api.h" #include "core/session/allocator_adapters.h" #include "core/session/ort_apis.h" -#include "core/framework/error_code_helper.h" #include #include +#include +#include + +#include "core/providers/cuda/cuda_provider_factory_creator.h" +#include "core/providers/cuda/cuda_provider_factory.h" +#include "core/providers/cuda/cuda_provider_options.h" namespace onnxruntime { + +ProviderInfo_CUDA* TryGetProviderInfo_CUDA(); + namespace lora { LoraAdapter::Param::Param(OrtValue ort_value_mapped) noexcept @@ -42,18 +52,62 @@ void LoraAdapter::MemoryMap(const std::filesystem::path& file_path) { InitializeParamsValues(); } +static Status GetDataTransfer(const OrtMemoryInfo& mem_info, + std::unique_ptr& data_transfer) { + ORT_RETURN_IF(mem_info.device.Type() == OrtDevice::CPU, "Destination must not be on CPU"); + if (strcmp(mem_info.name, onnxruntime::CUDA) == 0) { + auto* cuda_provider_info = TryGetProviderInfo_CUDA(); + if (cuda_provider_info != nullptr) { + data_transfer = cuda_provider_info->CreateGPUDataTransfer(); + } + } + + if (data_transfer == nullptr) { + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Destination provider: ", + mem_info.name, " not available, copy failed"); + } + + return Status::OK(); +} + +static Status CreateOrtValueOnDevice(const OrtValue& ort_value_mapped, + const AllocatorPtr& device_allocator, + const IDataTransfer& data_transfer, + OrtValue& out) { + OrtValue result; + const auto& src = ort_value_mapped.Get(); + Tensor on_device(src.DataType(), src.Shape(), device_allocator); + ORT_RETURN_IF_ERROR(data_transfer.CopyTensor(src, on_device)); + Tensor::InitOrtValue(std::move(on_device), result); + out = std::move(result); + return Status::OK(); +} + void LoraAdapter::InitializeParamsValues() { if (adapter_ == nullptr) { ORT_THROW("Adapter is not loaded yet."); } + std::unique_ptr data_transfer; + if (device_allocator_) { + ORT_THROW_IF_ERROR(GetDataTransfer(device_allocator_->Info(), data_transfer)); + } + const auto* params = adapter_->parameters(); std::unordered_map params_values; params_values.reserve(params->size()); for (const auto* param : *params) { auto [name, ort_value] = adapters::utils::CreateOrtValueOverLoraParameter(*param); - Param lora_param(std::move(ort_value)); - params_values.emplace(std::move(name), std::move(lora_param)); + if (device_allocator_) { + OrtValue ort_value_ondevice; + ORT_THROW_IF_ERROR(CreateOrtValueOnDevice(ort_value, device_allocator_, + *data_transfer, ort_value_ondevice)); + Param lora_param(std::move(ort_value), std::move(ort_value_ondevice)); + params_values.emplace(std::move(name), std::move(lora_param)); + } else { + Param lora_param(std::move(ort_value)); + params_values.emplace(std::move(name), std::move(lora_param)); + } } params_values_.swap(params_values); } diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index c759dfe5fb0ca..c2aede3e00875 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -192,5 +192,39 @@ TEST(LoraAdapterTest, Load) { } } +#ifdef USE_CUDA +TEST(LoraAdapterTest, VerifyDeviceCopy) { + auto cpu_ep = DefaultCpuExecutionProvider(); + auto cpu_allocator = cpu_ep->CreatePreferredAllocators()[0]; + auto cuda_ep = DefaultCudaExecutionProvider(); + auto cuda_allocator = cuda_ep->CreatePreferredAllocators()[0]; + + auto gpu_transfer = cuda_ep->GetDataTransfer(); + + auto test_params = GenerateTestParameters()(); + lora::LoraAdapter adapter(std::move(cuda_allocator)); + adapter.Load(std::move(test_params)); + + auto [begin, end] = adapter.GetParamIterators(); + for (; begin != end; ++begin) { + const auto& [_, param] = *begin; + const auto& tensor_device = param.GetDeviceOrMapped().Get(); + ASSERT_EQ(0, strcmp(tensor_device.Location().name, onnxruntime::CUDA)); + + const auto& tensor_cpu = param.GetMapped().Get(); + ASSERT_EQ(tensor_cpu.Shape().Size(), tensor_device.Shape().Size()); + + Tensor copy(tensor_cpu.DataType(), tensor_cpu.Shape(), cpu_allocator); + ASSERT_TRUE(gpu_transfer->CanCopy(tensor_device.Location().device, + copy.Location().device)); + ASSERT_STATUS_OK(gpu_transfer->CopyTensor(tensor_device, copy)); + + auto expected_span = tensor_cpu.DataAsSpan(); + auto copy_span = copy.DataAsSpan(); + + ASSERT_EQ(expected_span, copy_span); + } +} +#endif } // namespace test } // namespace onnxruntime From 90b0197c7c191679636ea4527f45df0199af0e41 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 25 Sep 2024 12:38:51 -0700 Subject: [PATCH 71/84] Address review comments --- include/onnxruntime/core/framework/run_options.h | 2 +- .../onnxruntime/core/session/onnxruntime_c_api.h | 10 +++++----- .../onnxruntime/core/session/onnxruntime_cxx_api.h | 4 ++-- .../core/session/onnxruntime_cxx_inline.h | 4 ++-- onnxruntime/core/framework/run_options.cc | 4 ++-- onnxruntime/core/session/lora_adapters.cc | 14 +++++++------- onnxruntime/core/session/onnxruntime_c_api.cc | 8 ++++---- onnxruntime/core/session/ort_apis.h | 4 ++-- onnxruntime/lora/adapter_format/README.md | 4 ++-- onnxruntime/python/onnxruntime_pybind_state.cc | 8 ++++---- onnxruntime/test/shared_lib/test_inference.cc | 2 +- .../test/testdata/lora/two_params_lora_model.py | 3 +++ 12 files changed, 35 insertions(+), 32 deletions(-) diff --git a/include/onnxruntime/core/framework/run_options.h b/include/onnxruntime/core/framework/run_options.h index ffe5c61f506c0..fab65e8fee692 100644 --- a/include/onnxruntime/core/framework/run_options.h +++ b/include/onnxruntime/core/framework/run_options.h @@ -48,7 +48,7 @@ struct OrtRunOptions { // /include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h onnxruntime::ConfigOptions config_options; - onnxruntime::InlinedVector active_adapters_; + onnxruntime::InlinedVector active_adapters; OrtRunOptions() = default; ~OrtRunOptions() = default; diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 9b9ac819efb18..1a209b965baf5 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -4681,7 +4681,7 @@ struct OrtApi { * * \param[in] adapter_file_path Absolute file path to the adapter file. * \param[in] allocator optional pointer to a device allocator. If specified - * data is copied to the device at some point before Run() is invoked, if nullptr data stays on CPU. + * data is copied to the device at some point before Run() is invoked. If nullptr, data stays on CPU. * The data would still be copied to device if required by the model at inference time. * \param[out] out A pointer to a newly created OrtLoraAdapter instance. Must be released with * OrtApi::ReleaseLoraAdapter. @@ -4695,17 +4695,17 @@ struct OrtApi { /** \brief Set the active Lora Adapter for the run options * - * The function sets the active Lora Adapter for the run options. The Lora Adapter must be created with + * The function adds the Lora Adapter to the list of active adapters. The Lora Adapter must be created with * OrtApi::CreateLoraAdapter. The Lora Adapter will be used by the session to run the model. - * The instance of the OrtRunOptions will then can be used to customize the OrtSession::Run() calls. - * More than one OrtLoraAdapter can be set active at the same time. Lora Parameters that belong to difference + * The instance of the OrtRunOptions will can then be then used to customize the Run() calls. + * More than one OrtLoraAdapter can be active at the same time. Lora Parameters that belong to different * Lora adapters that will be active at the same time must not overlap. * This setting does not affect RunWithBinding. * * \param[in] options OrtRunOptions instance * \param[in] adapter OrtLoraAdapter instance */ - ORT_API2_STATUS(RunOptionsSetActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter* adapter); + ORT_API2_STATUS(RunOptionsAddActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter* adapter); }; /* diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h index 4934ff97a857b..07320a66bc55e 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h @@ -779,13 +779,13 @@ struct RunOptions : detail::Base { */ RunOptions& UnsetTerminate(); - /** \brief Designates the argument as an active adapter for the session Run() calls. + /** \brief Add the LoraAdapter to the list of active adapters. * The setting does not affect RunWithBinding() calls. * * Wraps OrtApi::RunOptionsSetLoraAdapterActive * \param adapter The LoraAdapter to be used as the active adapter */ - RunOptions& SetLoraAdapterActive(const LoraAdapter& adapter); + RunOptions& AddLoraAdapterActive(const LoraAdapter& adapter); }; namespace detail { diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h index 805a2553a0530..019b998aafd80 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h @@ -613,8 +613,8 @@ inline RunOptions& RunOptions::UnsetTerminate() { return *this; } -inline RunOptions& RunOptions::SetLoraAdapterActive(const LoraAdapter& adapter) { - ThrowOnError(GetApi().RunOptionsSetActiveLoraAdapter(p_, adapter)); +inline RunOptions& RunOptions::AddLoraAdapterActive(const LoraAdapter& adapter) { + ThrowOnError(GetApi().RunOptionsAddActiveLoraAdapter(p_, adapter)); return *this; } diff --git a/onnxruntime/core/framework/run_options.cc b/onnxruntime/core/framework/run_options.cc index 743633f157930..cb07cc22b1b2f 100644 --- a/onnxruntime/core/framework/run_options.cc +++ b/onnxruntime/core/framework/run_options.cc @@ -63,11 +63,11 @@ ORT_API_STATUS_IMPL(OrtApis::AddRunConfigEntry, _Inout_ OrtRunOptions* options, return onnxruntime::ToOrtStatus(options->config_options.AddConfigEntry(config_key, config_value)); } -ORT_API_STATUS_IMPL(OrtApis::RunOptionsSetActiveLoraAdapter, _Inout_ OrtRunOptions* options, +ORT_API_STATUS_IMPL(OrtApis::RunOptionsAddActiveLoraAdapter, _Inout_ OrtRunOptions* options, const _In_ OrtLoraAdapter* adapter) { API_IMPL_BEGIN auto* lora_adapter = reinterpret_cast(adapter); - options->active_adapters_.push_back(lora_adapter); + options->active_adapters.push_back(lora_adapter); return nullptr; API_IMPL_END } diff --git a/onnxruntime/core/session/lora_adapters.cc b/onnxruntime/core/session/lora_adapters.cc index 69e1fca702516..f090b510a8e3d 100644 --- a/onnxruntime/core/session/lora_adapters.cc +++ b/onnxruntime/core/session/lora_adapters.cc @@ -10,18 +10,16 @@ #include "core/session/allocator_adapters.h" #include "core/session/ort_apis.h" -#include -#include -#include -#include - -#include "core/providers/cuda/cuda_provider_factory_creator.h" +#ifdef USE_CUDA #include "core/providers/cuda/cuda_provider_factory.h" -#include "core/providers/cuda/cuda_provider_options.h" +#endif +#include namespace onnxruntime { +#ifdef USE_CUDA ProviderInfo_CUDA* TryGetProviderInfo_CUDA(); +#endif namespace lora { @@ -56,10 +54,12 @@ static Status GetDataTransfer(const OrtMemoryInfo& mem_info, std::unique_ptr& data_transfer) { ORT_RETURN_IF(mem_info.device.Type() == OrtDevice::CPU, "Destination must not be on CPU"); if (strcmp(mem_info.name, onnxruntime::CUDA) == 0) { +#ifdef USE_CUDA auto* cuda_provider_info = TryGetProviderInfo_CUDA(); if (cuda_provider_info != nullptr) { data_transfer = cuda_provider_info->CreateGPUDataTransfer(); } +#endif } if (data_transfer == nullptr) { diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index 48fd3584ec0f7..fbc879ce9637a 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -823,7 +823,7 @@ void CheckAndAdjustForLora(const OrtRunOptions& run_options, gsl::span& input_names, gsl::span& inputs) { size_t total_lora_params = 0; - for (const lora::LoraAdapter* ad : run_options.active_adapters_) { + for (const lora::LoraAdapter* ad : run_options.active_adapters) { total_lora_params += ad->GetParamNum(); } @@ -832,7 +832,7 @@ void CheckAndAdjustForLora(const OrtRunOptions& run_options, std::copy(input_names.begin(), input_names.end(), std::back_inserter(input_names_with_lora)); std::copy(inputs.begin(), inputs.end(), std::back_inserter(input_with_lora)); - for (const lora::LoraAdapter* ad : run_options.active_adapters_) { + for (const lora::LoraAdapter* ad : run_options.active_adapters) { ad->OutputAdapterParameters(std::back_inserter(input_names_with_lora), std::back_inserter(input_with_lora)); } @@ -858,7 +858,7 @@ ORT_API_STATUS_IMPL(OrtApis::Run, _Inout_ OrtSession* sess, _In_opt_ const OrtRu Status status; if (run_options != nullptr) { - if (!run_options->active_adapters_.empty()) { + if (!run_options->active_adapters.empty()) { InlinedVector input_names_with_lora; InlinedVector input_with_lora; @@ -2775,7 +2775,7 @@ static constexpr OrtApi ort_api_1_to_20 = { // End of Version 18 - DO NOT MODIFY ABOVE (see above text for more information) &OrtApis::CreateLoraAdapter, &OrtApis::ReleaseLoraAdapter, - &OrtApis::RunOptionsSetActiveLoraAdapter, + &OrtApis::RunOptionsAddActiveLoraAdapter, }; // OrtApiBase can never change as there is no way to know what version of OrtApiBase is returned by OrtGetApiBase. diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h index 210ef607c053e..8c096f1ec63f8 100644 --- a/onnxruntime/core/session/ort_apis.h +++ b/onnxruntime/core/session/ort_apis.h @@ -118,7 +118,7 @@ ORT_API_STATUS_IMPL(RunOptionsGetRunTag, _In_ const OrtRunOptions*, _Out_ const ORT_API_STATUS_IMPL(RunOptionsSetTerminate, _Inout_ OrtRunOptions* options); ORT_API_STATUS_IMPL(RunOptionsUnsetTerminate, _Inout_ OrtRunOptions* options); -ORT_API_STATUS_IMPL(RunOptionsSetActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter*); +ORT_API_STATUS_IMPL(RunOptionsAddActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter*); ORT_API_STATUS_IMPL(CreateTensorAsOrtValue, _Inout_ OrtAllocator* allocator, _In_ const int64_t* shape, size_t shape_len, ONNXTensorElementDataType type, @@ -528,6 +528,6 @@ ORT_API_STATUS_IMPL(KernelInfoGetAllocator, _In_ const OrtKernelInfo* info, _In_ ORT_API_STATUS_IMPL(CreateLoraAdapter, _In_ const ORTCHAR_T* adapter_file_path, _In_ OrtAllocator* allocator, _Outptr_ OrtLoraAdapter** out); ORT_API(void, ReleaseLoraAdapter, _Frees_ptr_opt_ OrtLoraAdapter*); -ORT_API_STATUS_IMPL(RunOptionsSetActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter* adapter); +ORT_API_STATUS_IMPL(RunOptionsAddActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter* adapter); } // namespace OrtApis diff --git a/onnxruntime/lora/adapter_format/README.md b/onnxruntime/lora/adapter_format/README.md index d28f47186cbea..2bb107584d9ff 100644 --- a/onnxruntime/lora/adapter_format/README.md +++ b/onnxruntime/lora/adapter_format/README.md @@ -1,8 +1,8 @@ # Lora Parameters Flatbuffer Schemas This directory contains [ONNXRuntime Lora Parameter format schema](lora_schema.fbs) and [the generated C++ header file](lora_schema.fbs.h) for the -Lora Parameters file format. This file format is defined as means to deliver Lora parameters so it can read by ONNXRuntime C++ code. +Lora Parameters file format. This file format is defined as a means to deliver Lora parameters so it can read by ONNXRuntime C++ code. -The format format is generally designed to house a single Lora adapter named Lora parameters. +The format is generally designed to house a single Lora adapter named Lora parameters. [ONNXRuntime Lora Parameter file format schema](lora_schema.fbs) uses the [FlatBuffers](https://github.com/google/flatbuffers) serialization library. diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 645d44011765c..66c912c8f24ea 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -1906,7 +1906,7 @@ RunOptions instance. The individual calls will exit gracefully and return an err R"pbdoc(Get a single run configuration value using the given configuration key.)pbdoc") .def( "set_adapter_active", [](RunOptions* options, lora::LoraAdapter* adapter) { - options->active_adapters_.push_back(adapter); + options->active_adapters.push_back(adapter); }, R"pbdoc(Activates the specified lora adapter)pbdoc"); @@ -2029,15 +2029,15 @@ including arg name, arg type (contains both type and shape).)pbdoc") const std::map& pyfeeds, RunOptions* run_options = nullptr) -> py::list { NameMLValMap feeds; - if (run_options != nullptr && !run_options->active_adapters_.empty()) { + if (run_options != nullptr && !run_options->active_adapters.empty()) { size_t total_entries = pyfeeds.size(); - for (const auto* adapter : run_options->active_adapters_) { + for (const auto* adapter : run_options->active_adapters) { total_entries += adapter->GetParamNum(); } feeds.reserve(total_entries); // Append necessary inputs for active adapters - for (const auto* adapter : run_options->active_adapters_) { + for (const auto* adapter : run_options->active_adapters) { auto [begin, end] = adapter->GetParamIterators(); for (; begin != end; ++begin) { const auto& [name, param] = *begin; diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc index d34348ee21b2b..354aa3bfde804 100644 --- a/onnxruntime/test/shared_lib/test_inference.cc +++ b/onnxruntime/test/shared_lib/test_inference.cc @@ -4410,7 +4410,7 @@ TEST(CApiTest, RunWithLoraAdapter) { Ort::LoraAdapter adapter(adapter_path, nullptr); Ort::RunOptions run_options; - run_options.SetLoraAdapterActive(adapter); + run_options.AddLoraAdapterActive(adapter); // Single input constexpr const std::array input_shape = {4, 4}; diff --git a/onnxruntime/test/testdata/lora/two_params_lora_model.py b/onnxruntime/test/testdata/lora/two_params_lora_model.py index 8f43e054217da..12706ad71e82e 100644 --- a/onnxruntime/test/testdata/lora/two_params_lora_model.py +++ b/onnxruntime/test/testdata/lora/two_params_lora_model.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + import os import numpy as np From 43bf431fc6328f751d93c7b17d3d3a3b64340b6a Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 25 Sep 2024 14:11:06 -0700 Subject: [PATCH 72/84] Add pybind registration at training for Lora and remove session lora linkage --- cmake/onnxruntime_session.cmake | 2 +- orttraining/orttraining/python/orttraining_python_module.cc | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cmake/onnxruntime_session.cmake b/cmake/onnxruntime_session.cmake index 47cf2dfc5e7aa..c4011813e1544 100644 --- a/cmake/onnxruntime_session.cmake +++ b/cmake/onnxruntime_session.cmake @@ -31,7 +31,7 @@ source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_session_srcs}) onnxruntime_add_static_library(onnxruntime_session ${onnxruntime_session_srcs}) onnxruntime_add_include_to_target(onnxruntime_session onnxruntime_common onnxruntime_framework onnxruntime_lora onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface nlohmann_json::nlohmann_json) -target_link_libraries(onnxruntime_session PRIVATE onnxruntime_lora) + if(onnxruntime_ENABLE_INSTRUMENT) target_compile_definitions(onnxruntime_session PUBLIC ONNXRUNTIME_ENABLE_INSTRUMENT) endif() diff --git a/orttraining/orttraining/python/orttraining_python_module.cc b/orttraining/orttraining/python/orttraining_python_module.cc index b0d1ed50af126..7ec924b6d9bb4 100644 --- a/orttraining/orttraining/python/orttraining_python_module.cc +++ b/orttraining/orttraining/python/orttraining_python_module.cc @@ -317,6 +317,7 @@ PYBIND11_MODULE(onnxruntime_pybind11_state, m) { addOrtValueMethods(m); addSparseTensorMethods(m); addIoBindingMethods(m); + addAdapterFormatMethods(m); #if !defined(__APPLE__) && \ (!defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD) || defined(ORT_MINIMAL_BUILD_CUSTOM_OPS)) From d3ed0f55a6573db82ce2eb5791c69526cc61b584 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 25 Sep 2024 14:35:56 -0700 Subject: [PATCH 73/84] Restore linkage to lora --- cmake/onnxruntime_session.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/onnxruntime_session.cmake b/cmake/onnxruntime_session.cmake index c4011813e1544..47cf2dfc5e7aa 100644 --- a/cmake/onnxruntime_session.cmake +++ b/cmake/onnxruntime_session.cmake @@ -31,7 +31,7 @@ source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_session_srcs}) onnxruntime_add_static_library(onnxruntime_session ${onnxruntime_session_srcs}) onnxruntime_add_include_to_target(onnxruntime_session onnxruntime_common onnxruntime_framework onnxruntime_lora onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface nlohmann_json::nlohmann_json) - +target_link_libraries(onnxruntime_session PRIVATE onnxruntime_lora) if(onnxruntime_ENABLE_INSTRUMENT) target_compile_definitions(onnxruntime_session PUBLIC ONNXRUNTIME_ENABLE_INSTRUMENT) endif() From 8076dba072a1b9fce2a3ad15bfa312b37fd6d58a Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 25 Sep 2024 14:41:55 -0700 Subject: [PATCH 74/84] Restore training linkage to lora --- cmake/onnxruntime_training.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/onnxruntime_training.cmake b/cmake/onnxruntime_training.cmake index b633a9c2de378..f1a5f908eb245 100644 --- a/cmake/onnxruntime_training.cmake +++ b/cmake/onnxruntime_training.cmake @@ -139,7 +139,7 @@ if (onnxruntime_BUILD_UNIT_TESTS) target_compile_options(onnxruntime_training_mnist PUBLIC "-Wno-maybe-uninitialized") endif() endif() - target_link_libraries(onnxruntime_training_mnist PRIVATE onnxruntime_training_runner onnxruntime_training ${ONNXRUNTIME_LIBS} ${onnxruntime_EXTERNAL_LIBRARIES}) + target_link_libraries(onnxruntime_training_mnist PRIVATE onnxruntime_training_runner onnxruntime_lora onnxruntime_training ${ONNXRUNTIME_LIBS} ${onnxruntime_EXTERNAL_LIBRARIES}) set_target_properties(onnxruntime_training_mnist PROPERTIES FOLDER "ONNXRuntimeTest") # squeezenet From 9727cbe8626a69ac4aaff40fc16a8c7ede66e2ae Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 25 Sep 2024 17:39:07 -0700 Subject: [PATCH 75/84] Avoid ORT_RETURN_IF_ERROR, fails on DNNL --- onnxruntime/core/session/lora_adapters.cc | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/onnxruntime/core/session/lora_adapters.cc b/onnxruntime/core/session/lora_adapters.cc index f090b510a8e3d..d62d0dc98d429 100644 --- a/onnxruntime/core/session/lora_adapters.cc +++ b/onnxruntime/core/session/lora_adapters.cc @@ -62,10 +62,8 @@ static Status GetDataTransfer(const OrtMemoryInfo& mem_info, #endif } - if (data_transfer == nullptr) { - return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Destination provider: ", - mem_info.name, " not available, copy failed"); - } + ORT_RETURN_IF(data_transfer == nullptr, "Destination memory device: ", + mem_info.name, " not available, copy failed"); return Status::OK(); } @@ -77,7 +75,12 @@ static Status CreateOrtValueOnDevice(const OrtValue& ort_value_mapped, OrtValue result; const auto& src = ort_value_mapped.Get(); Tensor on_device(src.DataType(), src.Shape(), device_allocator); - ORT_RETURN_IF_ERROR(data_transfer.CopyTensor(src, on_device)); + { + auto status = data_transfer.CopyTensor(src, on_device); + if (!status.IsOK()) { + return status; + } + } Tensor::InitOrtValue(std::move(on_device), result); out = std::move(result); return Status::OK(); From f6a840492c9bc3a246b37b66ebdc8bf7933df9c5 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 26 Sep 2024 12:13:18 -0700 Subject: [PATCH 76/84] Address review comments --- .../core/session/onnxruntime_cxx_api.h | 2 +- .../core/session/onnxruntime_cxx_inline.h | 2 +- onnxruntime/core/session/lora_adapters.cc | 14 ++------- onnxruntime/core/session/lora_adapters.h | 2 -- onnxruntime/core/session/onnxruntime_c_api.cc | 29 ++++++++++++------- onnxruntime/lora/adapter_format_version.h | 4 +-- onnxruntime/test/shared_lib/test_inference.cc | 2 +- 7 files changed, 26 insertions(+), 29 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h index 07320a66bc55e..e1bb072c8a096 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h @@ -785,7 +785,7 @@ struct RunOptions : detail::Base { * Wraps OrtApi::RunOptionsSetLoraAdapterActive * \param adapter The LoraAdapter to be used as the active adapter */ - RunOptions& AddLoraAdapterActive(const LoraAdapter& adapter); + RunOptions& AddActiveLoraAdapter(const LoraAdapter& adapter); }; namespace detail { diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h index 019b998aafd80..0ca580010e9ba 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h @@ -613,7 +613,7 @@ inline RunOptions& RunOptions::UnsetTerminate() { return *this; } -inline RunOptions& RunOptions::AddLoraAdapterActive(const LoraAdapter& adapter) { +inline RunOptions& RunOptions::AddActiveLoraAdapter(const LoraAdapter& adapter) { ThrowOnError(GetApi().RunOptionsAddActiveLoraAdapter(p_, adapter)); return *this; } diff --git a/onnxruntime/core/session/lora_adapters.cc b/onnxruntime/core/session/lora_adapters.cc index d62d0dc98d429..f33e9047d5f5a 100644 --- a/onnxruntime/core/session/lora_adapters.cc +++ b/onnxruntime/core/session/lora_adapters.cc @@ -1,9 +1,11 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "lora_adapters.h" +#include "core/session/lora_adapters.h" #include "lora/adapter_format_utils.h" +#include + #include "core/framework/data_transfer.h" #include "core/framework/error_code_helper.h" #include "core/session/onnxruntime_c_api.h" @@ -13,7 +15,6 @@ #ifdef USE_CUDA #include "core/providers/cuda/cuda_provider_factory.h" #endif -#include namespace onnxruntime { @@ -115,15 +116,6 @@ void LoraAdapter::InitializeParamsValues() { params_values_.swap(params_values); } -size_t LoraAdapter::GetBufferSize() const { - if (std::holds_alternative(buffer_)) { - return std::get<1>(buffer_).file_size_; - } else if (std::holds_alternative(buffer_)) { - return std::get<2>(buffer_).buffer_.size(); - } - ORT_THROW("Non-exhaustive visitor for BinaryFormatHolder::GetSize()"); -} - } // namespace lora } // namespace onnxruntime diff --git a/onnxruntime/core/session/lora_adapters.h b/onnxruntime/core/session/lora_adapters.h index 4d6d7ff2a8344..77534b2bb7d15 100644 --- a/onnxruntime/core/session/lora_adapters.h +++ b/onnxruntime/core/session/lora_adapters.h @@ -152,8 +152,6 @@ class LoraAdapter { private: void InitializeParamsValues(); - // Get the size of the buffer - size_t GetBufferSize() const; struct BufferHolder { explicit BufferHolder(std::vector buffer) : buffer_(std::move(buffer)) {} diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index fbc879ce9637a..404792afce88f 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -817,28 +817,28 @@ ORT_API_STATUS_IMPL(OrtApis::CreateSessionFromArray, _In_ const OrtEnv* env, _In namespace { // Checks if there are active lora adapters and adjusts input spans. -void CheckAndAdjustForLora(const OrtRunOptions& run_options, - InlinedVector& input_names_with_lora, - InlinedVector& input_with_lora, - gsl::span& input_names, - gsl::span& inputs) { +void CheckAndAdjustInputSpansForLora(const OrtRunOptions& run_options, + InlinedVector& input_names_with_lora, + InlinedVector& inputs_with_lora, + gsl::span& input_names, + gsl::span& inputs) { size_t total_lora_params = 0; for (const lora::LoraAdapter* ad : run_options.active_adapters) { total_lora_params += ad->GetParamNum(); } input_names_with_lora.reserve(input_names.size() + total_lora_params); - input_with_lora.reserve(inputs.size() + total_lora_params); + inputs_with_lora.reserve(inputs.size() + total_lora_params); std::copy(input_names.begin(), input_names.end(), std::back_inserter(input_names_with_lora)); - std::copy(inputs.begin(), inputs.end(), std::back_inserter(input_with_lora)); + std::copy(inputs.begin(), inputs.end(), std::back_inserter(inputs_with_lora)); for (const lora::LoraAdapter* ad : run_options.active_adapters) { ad->OutputAdapterParameters(std::back_inserter(input_names_with_lora), - std::back_inserter(input_with_lora)); + std::back_inserter(inputs_with_lora)); } input_names = gsl::make_span(input_names_with_lora); - inputs = gsl::make_span(input_with_lora); + inputs = gsl::make_span(inputs_with_lora); } } // namespace @@ -862,7 +862,7 @@ ORT_API_STATUS_IMPL(OrtApis::Run, _Inout_ OrtSession* sess, _In_opt_ const OrtRu InlinedVector input_names_with_lora; InlinedVector input_with_lora; - CheckAndAdjustForLora(*run_options, input_names_with_lora, input_with_lora, input_names_span, input_span); + CheckAndAdjustInputSpansForLora(*run_options, input_names_with_lora, input_with_lora, input_names_span, input_span); status = session->Run(*run_options, input_names_span, @@ -897,6 +897,10 @@ ORT_API_STATUS_IMPL(OrtApis::RunAsync, _Inout_ OrtSession* sess, _In_opt_ const API_IMPL_BEGIN auto session = reinterpret_cast<::onnxruntime::InferenceSession*>(sess); + if (run_options != nullptr && !run_options->active_adapters.empty()) { + LOGS(*session->GetLogger(), WARNING) << "RunAsync() active adapters specified, but won't have an effect"; + } + auto input_names_span = gsl::make_span(input_names, input_len); auto input_span = gsl::make_span(input, input_len); auto output_name_span = gsl::make_span(output_names, output_names_len); @@ -928,6 +932,9 @@ ORT_API_STATUS_IMPL(OrtApis::RunWithBinding, _Inout_ OrtSession* sess, _In_ cons OrtRunOptions default_run_options; status = session->Run(default_run_options, *binding_ptr->binding_); } else { + if (!run_options->active_adapters.empty()) { + LOGS(*session->GetLogger(), WARNING) << "RunWithBinding() active adapters specified, but won't have effect"; + } status = session->Run(*run_options, *binding_ptr->binding_); } if (!status.IsOK()) { @@ -2832,7 +2839,7 @@ ORT_API(const char*, OrtApis::GetVersionString) { return ORT_VERSION; } -const char* ORT_API_CALL OrtApis::GetBuildInfoString() noexcept { +ORT_API(const char*, OrtApis::GetBuildInfoString) { return ORT_BUILD_INFO; } diff --git a/onnxruntime/lora/adapter_format_version.h b/onnxruntime/lora/adapter_format_version.h index d653492c6db46..73b1ba34b31f2 100644 --- a/onnxruntime/lora/adapter_format_version.h +++ b/onnxruntime/lora/adapter_format_version.h @@ -9,8 +9,8 @@ namespace onnxruntime { namespace adapters { -// The current model versions for saving lora parameters in flatbuffers -// Once this version is updated, the kSupportedAdapterFormatVersions in IsGenAiLoraFormatModelBytes +// The current model versions for saving lora parameters in flatbuffers format. +// Once this version is updated, the kSupportedAdapterFormatVersions in IsAdapterFormatVersionSupported // below will also need to be updated. // See src/flatbuffers/schema/README.md for more details on versioning. // Version 1 - history begins diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc index 354aa3bfde804..8f955e432efe0 100644 --- a/onnxruntime/test/shared_lib/test_inference.cc +++ b/onnxruntime/test/shared_lib/test_inference.cc @@ -4410,7 +4410,7 @@ TEST(CApiTest, RunWithLoraAdapter) { Ort::LoraAdapter adapter(adapter_path, nullptr); Ort::RunOptions run_options; - run_options.AddLoraAdapterActive(adapter); + run_options.AddActiveLoraAdapter(adapter); // Single input constexpr const std::array input_shape = {4, 4}; From 56c7e27760332dfc2770d305b9113c48ce5ec48a Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 27 Sep 2024 12:11:30 -0700 Subject: [PATCH 77/84] Add CreateLoraAdapterFromArray public API --- .../core/session/onnxruntime_c_api.h | 18 +++++- .../core/session/onnxruntime_cxx_api.h | 17 ++++- .../core/session/onnxruntime_cxx_inline.h | 14 ++++- onnxruntime/core/session/lora_adapters.cc | 62 +++++++++++++------ onnxruntime/core/session/onnxruntime_c_api.cc | 1 + onnxruntime/core/session/ort_apis.h | 2 + onnxruntime/test/shared_lib/test_inference.cc | 35 ++++++++++- 7 files changed, 124 insertions(+), 25 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 1a209b965baf5..c24f29fd9dcb1 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -4689,6 +4689,22 @@ struct OrtApi { ORT_API2_STATUS(CreateLoraAdapter, const ORTCHAR_T* adapter_file_path, _In_ OrtAllocator* allocator, _Outptr_ OrtLoraAdapter** out); + /** \brief Create an OrtLoraAdapter + * + * The function copies the bytes from the array and creates an OrtLoraAdapter instance. + * + * + * \param[in] bytes pointer to a valid Lora Adapter format buffer. + * \param[in] num_bytes length of bytes buffer. + * \param[in] allocator optional pointer to a device allocator. If specified + * data is copied to the device at some point before Run() is invoked. If nullptr, data stays on CPU. + * The data would still be copied to device if required by the model at inference time. + * \param[out] out A pointer to a newly created OrtLoraAdapter instance. Must be released with + * OrtApi::ReleaseLoraAdapter. + */ + ORT_API2_STATUS(CreateLoraAdapterFromArray, _In_ const uint8_t* bytes, size_t num_bytes, _In_ OrtAllocator* allocator, + _Outptr_ OrtLoraAdapter** out); + /** \brief Release an ::OrtLoraAdapter obtained from OrtApi::CreateLoraAdapter */ ORT_CLASS_RELEASE(LoraAdapter); @@ -4696,7 +4712,7 @@ struct OrtApi { /** \brief Set the active Lora Adapter for the run options * * The function adds the Lora Adapter to the list of active adapters. The Lora Adapter must be created with - * OrtApi::CreateLoraAdapter. The Lora Adapter will be used by the session to run the model. + * OrtApi::CreateLoraAdapter or FromArray. The Lora Adapter will be used by the session to run the model. * The instance of the OrtRunOptions will can then be then used to customize the Run() calls. * More than one OrtLoraAdapter can be active at the same time. Lora Parameters that belong to different * Lora adapters that will be active at the same time must not overlap. diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h index e1bb072c8a096..80aa4550123f7 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h @@ -739,13 +739,28 @@ struct CustomOpDomain : detail::Base { /// \brief LoraAdapter holds a set of Lora Parameters loaded from a single file struct LoraAdapter : detail::Base { + using Base = detail::Base; + using Base::Base; + + explicit LoraAdapter(std::nullptr_t) {} ///< Create an empty LoraAdapter object, must be assigned a valid one to be used /// \brief Wraps OrtApi::CreateLoraAdapter /// /// The function attempts to load the adapter from the specified file /// \param absolute_adapter_path The absolute path to the Lora adapter /// \param allocator optional pointer to a device allocator. If nullptr, the data stays on CPU. It would still /// be copied to device if required by the model at inference time. - explicit LoraAdapter(const std::basic_string& absolute_adapter_path, OrtAllocator* allocator); + static LoraAdapter CreateLoraAdapter(const std::basic_string& absolute_adapter_path, + OrtAllocator* allocator); + + /// \brief Wraps OrtApi::CreateLoraAdapterFromArray + /// + /// The function attempts to load the adapter from the specified byte array. + /// \param bytes The byte array containing file LoraAdapter format + /// \param num_bytes The number of bytes in the byte array + /// \param allocator optional pointer to a device allocator. If nullptr, the data stays on CPU. It would still + /// be copied to device if required by the model at inference time. + static LoraAdapter CreateLoraAdapterFromArray(const uint8_t* bytes, size_t num_bytes, + OrtAllocator* allocator); }; /** \brief RunOptions diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h index 0ca580010e9ba..c909b3c1dc85a 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h @@ -557,8 +557,18 @@ inline void CustomOpDomain::Add(const OrtCustomOp* op) { ThrowOnError(GetApi().CustomOpDomain_Add(p_, op)); } -inline LoraAdapter::LoraAdapter(const std::basic_string& absolute_adapter_path, OrtAllocator* allocator) { - ThrowOnError(GetApi().CreateLoraAdapter(absolute_adapter_path.c_str(), allocator, &p_)); +inline LoraAdapter LoraAdapter::CreateLoraAdapter(const std::basic_string& absolute_adapter_path, + OrtAllocator* allocator) { + OrtLoraAdapter* p; + ThrowOnError(GetApi().CreateLoraAdapter(absolute_adapter_path.c_str(), allocator, &p)); + return LoraAdapter{p}; +} + +inline LoraAdapter LoraAdapter::CreateLoraAdapterFromArray(const uint8_t* bytes, size_t num_bytes, + OrtAllocator* allocator) { + OrtLoraAdapter* p; + GetApi().CreateLoraAdapterFromArray(bytes, num_bytes, allocator, &p); + return LoraAdapter{p}; } inline RunOptions::RunOptions() { diff --git a/onnxruntime/core/session/lora_adapters.cc b/onnxruntime/core/session/lora_adapters.cc index f33e9047d5f5a..3806eb3f14586 100644 --- a/onnxruntime/core/session/lora_adapters.cc +++ b/onnxruntime/core/session/lora_adapters.cc @@ -51,9 +51,13 @@ void LoraAdapter::MemoryMap(const std::filesystem::path& file_path) { InitializeParamsValues(); } -static Status GetDataTransfer(const OrtMemoryInfo& mem_info, - std::unique_ptr& data_transfer) { - ORT_RETURN_IF(mem_info.device.Type() == OrtDevice::CPU, "Destination must not be on CPU"); +static std::unique_ptr GetDataTransfer(const OrtMemoryInfo& mem_info) { + std::unique_ptr data_transfer; + + if (strcmp(mem_info.name, onnxruntime::CPU) == 0) { + return data_transfer; + } + if (strcmp(mem_info.name, onnxruntime::CUDA) == 0) { #ifdef USE_CUDA auto* cuda_provider_info = TryGetProviderInfo_CUDA(); @@ -63,10 +67,7 @@ static Status GetDataTransfer(const OrtMemoryInfo& mem_info, #endif } - ORT_RETURN_IF(data_transfer == nullptr, "Destination memory device: ", - mem_info.name, " not available, copy failed"); - - return Status::OK(); + return data_transfer; } static Status CreateOrtValueOnDevice(const OrtValue& ort_value_mapped, @@ -76,12 +77,7 @@ static Status CreateOrtValueOnDevice(const OrtValue& ort_value_mapped, OrtValue result; const auto& src = ort_value_mapped.Get(); Tensor on_device(src.DataType(), src.Shape(), device_allocator); - { - auto status = data_transfer.CopyTensor(src, on_device); - if (!status.IsOK()) { - return status; - } - } + ORT_RETURN_IF_ERROR(data_transfer.CopyTensor(src, on_device)); Tensor::InitOrtValue(std::move(on_device), result); out = std::move(result); return Status::OK(); @@ -94,25 +90,33 @@ void LoraAdapter::InitializeParamsValues() { std::unique_ptr data_transfer; if (device_allocator_) { - ORT_THROW_IF_ERROR(GetDataTransfer(device_allocator_->Info(), data_transfer)); + data_transfer = GetDataTransfer(device_allocator_->Info()); + if (data_transfer == nullptr) { + ORT_THROW("Data transfer is not available for the specified device allocator, it also must not be a CPU allocator"); + } } const auto* params = adapter_->parameters(); std::unordered_map params_values; params_values.reserve(params->size()); - for (const auto* param : *params) { - auto [name, ort_value] = adapters::utils::CreateOrtValueOverLoraParameter(*param); - if (device_allocator_) { + // Re-work in two separate loops due to compiler issues + if (data_transfer) { + for (const auto* param : *params) { + auto [name, ort_value] = adapters::utils::CreateOrtValueOverLoraParameter(*param); OrtValue ort_value_ondevice; ORT_THROW_IF_ERROR(CreateOrtValueOnDevice(ort_value, device_allocator_, *data_transfer, ort_value_ondevice)); Param lora_param(std::move(ort_value), std::move(ort_value_ondevice)); params_values.emplace(std::move(name), std::move(lora_param)); - } else { + } + } else { + for (const auto* param : *params) { + auto [name, ort_value] = adapters::utils::CreateOrtValueOverLoraParameter(*param); Param lora_param(std::move(ort_value)); params_values.emplace(std::move(name), std::move(lora_param)); } } + params_values_.swap(params_values); } @@ -137,6 +141,28 @@ ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapter, _In_ const ORTCHAR_T* adapter_fi API_IMPL_END } +ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapterFromArray, const uint8_t* bytes, size_t num_bytes, + _In_ OrtAllocator* allocator, _Outptr_ OrtLoraAdapter** adapter) { + API_IMPL_BEGIN + + std::unique_ptr lora_adapter; + if (allocator != nullptr) { + auto alloc_ptr = std::make_shared(allocator); + lora_adapter = std::make_unique(std::move(alloc_ptr)); + } else { + lora_adapter = std::make_unique(); + } + + auto span = gsl::make_span(bytes, num_bytes); + std::vector buffer; + buffer.reserve(num_bytes); + buffer.assign(span.begin(), span.end()); + lora_adapter->Load(std::move(buffer)); + *adapter = reinterpret_cast(lora_adapter.release()); + return nullptr; + API_IMPL_END +} + ORT_API(void, OrtApis::ReleaseLoraAdapter, _Frees_ptr_opt_ OrtLoraAdapter* adapter) { delete reinterpret_cast(adapter); } diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index 404792afce88f..a490cb8cb1a97 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -2781,6 +2781,7 @@ static constexpr OrtApi ort_api_1_to_20 = { &OrtApis::AddExternalInitializersFromFilesInMemory, // End of Version 18 - DO NOT MODIFY ABOVE (see above text for more information) &OrtApis::CreateLoraAdapter, + &OrtApis::CreateLoraAdapterFromArray, &OrtApis::ReleaseLoraAdapter, &OrtApis::RunOptionsAddActiveLoraAdapter, }; diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h index 8c096f1ec63f8..ab7d463819712 100644 --- a/onnxruntime/core/session/ort_apis.h +++ b/onnxruntime/core/session/ort_apis.h @@ -527,6 +527,8 @@ ORT_API_STATUS_IMPL(KernelInfoGetAllocator, _In_ const OrtKernelInfo* info, _In_ ORT_API_STATUS_IMPL(CreateLoraAdapter, _In_ const ORTCHAR_T* adapter_file_path, _In_ OrtAllocator* allocator, _Outptr_ OrtLoraAdapter** out); +ORT_API_STATUS_IMPL(CreateLoraAdapterFromArray, _In_ const uint8_t* bytes, size_t num_bytes, _In_ OrtAllocator* allocator, + _Outptr_ OrtLoraAdapter** out); ORT_API(void, ReleaseLoraAdapter, _Frees_ptr_opt_ OrtLoraAdapter*); ORT_API_STATUS_IMPL(RunOptionsAddActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter* adapter); diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc index 8f955e432efe0..b36a2bfe94cd2 100644 --- a/onnxruntime/test/shared_lib/test_inference.cc +++ b/onnxruntime/test/shared_lib/test_inference.cc @@ -4402,13 +4402,11 @@ TEST(CApiTest, RunAsyncFail) { EXPECT_THROW(session.RunAsync(run_options, input_names, input_tensors, 1, output_names, output_values, 1, CallbackFail, nullptr), std::exception); } -TEST(CApiTest, RunWithLoraAdapter) { +static void TestRunWithLoraAdapter(const Ort::LoraAdapter& adapter) { constexpr const ORTCHAR_T* model_path = TSTR("testdata/lora/two_params_lora_model.onnx"); - constexpr const ORTCHAR_T* adapter_path = TSTR("testdata/lora/two_params_lora_model.onnx_adapter"); Ort::Env env(ORT_LOGGING_LEVEL_WARNING); - Ort::LoraAdapter adapter(adapter_path, nullptr); Ort::RunOptions run_options; run_options.AddActiveLoraAdapter(adapter); @@ -4448,6 +4446,37 @@ TEST(CApiTest, RunWithLoraAdapter) { } } +static Ort::LoraAdapter CreateAdapterFromFile() { + constexpr const ORTCHAR_T* adapter_path = TSTR("testdata/lora/two_params_lora_model.onnx_adapter"); + return Ort::LoraAdapter::CreateLoraAdapter(adapter_path, nullptr); +} + +static Ort::LoraAdapter CreateAdapterFromArray() { + constexpr const ORTCHAR_T* adapter_path = TSTR("testdata/lora/two_params_lora_model.onnx_adapter"); + std::ifstream adapter_file(adapter_path, std::ios::binary); + + EXPECT_TRUE(adapter_file.is_open()); + adapter_file.seekg(0, std::ios::end); + const size_t adapter_size = adapter_file.tellg(); + + std::vector buffer(adapter_size); + adapter_file.seekg(0, std::ios::beg); + adapter_file.read(reinterpret_cast(buffer.data()), adapter_size); + adapter_file.close(); + + return Ort::LoraAdapter::CreateLoraAdapterFromArray(buffer.data(), buffer.size(), nullptr); +} + +TEST(CApi, RunWithLoraAdapterFromFile) { + auto adapter = CreateAdapterFromFile(); + TestRunWithLoraAdapter(adapter); +} + +TEST(CApi, RunWithLoraAdapterFromArray) { + auto adapter = CreateAdapterFromArray(); + TestRunWithLoraAdapter(adapter); +} + TEST(CApiTest, RunBaseLoraModel) { constexpr const ORTCHAR_T* model_path = TSTR("testdata/lora/two_params_lora_model.onnx"); Ort::Env env(ORT_LOGGING_LEVEL_WARNING); From dd77ecaa26c48049e00bc58d9bcd311527b01fbe Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 27 Sep 2024 12:26:44 -0700 Subject: [PATCH 78/84] Do not ignore status --- include/onnxruntime/core/session/onnxruntime_cxx_inline.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h index c909b3c1dc85a..cecdfac7724c4 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h @@ -567,7 +567,7 @@ inline LoraAdapter LoraAdapter::CreateLoraAdapter(const std::basic_string Date: Fri, 27 Sep 2024 13:38:39 -0700 Subject: [PATCH 79/84] Fix SAL annotation --- onnxruntime/core/session/lora_adapters.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/session/lora_adapters.cc b/onnxruntime/core/session/lora_adapters.cc index 3806eb3f14586..d320275dfe1dd 100644 --- a/onnxruntime/core/session/lora_adapters.cc +++ b/onnxruntime/core/session/lora_adapters.cc @@ -141,7 +141,7 @@ ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapter, _In_ const ORTCHAR_T* adapter_fi API_IMPL_END } -ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapterFromArray, const uint8_t* bytes, size_t num_bytes, +ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapterFromArray, _In_ const uint8_t* bytes, size_t num_bytes, _In_ OrtAllocator* allocator, _Outptr_ OrtLoraAdapter** adapter) { API_IMPL_BEGIN From e1224563dcda2ee648ae29d26a0820a0c60c5606 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Sat, 28 Sep 2024 15:03:18 -0700 Subject: [PATCH 80/84] Address review comments, adjust tests, enforce absolute path for adapter file. --- .../core/session/onnxruntime_c_api.h | 6 +- .../core/session/onnxruntime_cxx_api.h | 2 +- .../core/session/onnxruntime_cxx_inline.h | 2 +- onnxruntime/core/session/lora_adapters.cc | 18 ++++-- onnxruntime/core/session/onnxruntime_c_api.cc | 5 +- onnxruntime/core/session/ort_apis.h | 2 +- onnxruntime/lora/adapter_format/README.md | 2 +- onnxruntime/lora/adapter_format_utils.cc | 8 --- onnxruntime/lora/adapter_format_utils.h | 4 -- onnxruntime/lora/adapter_format_version.h | 2 +- .../python/onnxruntime_pybind_ortvalue.cc | 2 +- .../python/onnxruntime_pybind_state.cc | 58 ++++++++++++------- onnxruntime/test/lora/lora_test.cc | 17 ++++-- .../test/python/onnxruntime_test_python.py | 14 +++-- onnxruntime/test/shared_lib/test_inference.cc | 4 +- 15 files changed, 83 insertions(+), 63 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index c24f29fd9dcb1..a2471ff1e6db9 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -4702,18 +4702,18 @@ struct OrtApi { * \param[out] out A pointer to a newly created OrtLoraAdapter instance. Must be released with * OrtApi::ReleaseLoraAdapter. */ - ORT_API2_STATUS(CreateLoraAdapterFromArray, _In_ const uint8_t* bytes, size_t num_bytes, _In_ OrtAllocator* allocator, + ORT_API2_STATUS(CreateLoraAdapterFromArray, _In_ const void* bytes, size_t num_bytes, _In_ OrtAllocator* allocator, _Outptr_ OrtLoraAdapter** out); /** \brief Release an ::OrtLoraAdapter obtained from OrtApi::CreateLoraAdapter */ ORT_CLASS_RELEASE(LoraAdapter); - /** \brief Set the active Lora Adapter for the run options + /** \brief Add the Lora Adapter to the list of active adapters. * * The function adds the Lora Adapter to the list of active adapters. The Lora Adapter must be created with * OrtApi::CreateLoraAdapter or FromArray. The Lora Adapter will be used by the session to run the model. - * The instance of the OrtRunOptions will can then be then used to customize the Run() calls. + * The instance of the OrtRunOptions can then be used to customize the Run() calls. * More than one OrtLoraAdapter can be active at the same time. Lora Parameters that belong to different * Lora adapters that will be active at the same time must not overlap. * This setting does not affect RunWithBinding. diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h index 80aa4550123f7..df8675d05c03a 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h @@ -759,7 +759,7 @@ struct LoraAdapter : detail::Base { /// \param num_bytes The number of bytes in the byte array /// \param allocator optional pointer to a device allocator. If nullptr, the data stays on CPU. It would still /// be copied to device if required by the model at inference time. - static LoraAdapter CreateLoraAdapterFromArray(const uint8_t* bytes, size_t num_bytes, + static LoraAdapter CreateLoraAdapterFromArray(const void* bytes, size_t num_bytes, OrtAllocator* allocator); }; diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h index cecdfac7724c4..0f6d84eaed716 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h @@ -564,7 +564,7 @@ inline LoraAdapter LoraAdapter::CreateLoraAdapter(const std::basic_string buffer) { } void LoraAdapter::MemoryMap(const std::filesystem::path& file_path) { + CheckPathIsAbsolute(file_path); auto [mapped_memory, file_size] = adapters::utils::MemoryMapAdapterFile(file_path); auto u8_span = ReinterpretAsSpan(gsl::make_span(mapped_memory.get(), file_size)); adapter_ = adapters::utils::ValidateAndGetAdapterFromBytes(u8_span); buffer_.emplace(std::move(mapped_memory), file_size); - InitializeParamsValues(); } @@ -97,6 +104,7 @@ void LoraAdapter::InitializeParamsValues() { } const auto* params = adapter_->parameters(); + ORT_ENFORCE(params != nullptr, "Params absent"); std::unordered_map params_values; params_values.reserve(params->size()); // Re-work in two separate loops due to compiler issues @@ -141,7 +149,7 @@ ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapter, _In_ const ORTCHAR_T* adapter_fi API_IMPL_END } -ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapterFromArray, _In_ const uint8_t* bytes, size_t num_bytes, +ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapterFromArray, _In_ const void* bytes, size_t num_bytes, _In_ OrtAllocator* allocator, _Outptr_ OrtLoraAdapter** adapter) { API_IMPL_BEGIN @@ -153,10 +161,8 @@ ORT_API_STATUS_IMPL(OrtApis::CreateLoraAdapterFromArray, _In_ const uint8_t* byt lora_adapter = std::make_unique(); } - auto span = gsl::make_span(bytes, num_bytes); - std::vector buffer; - buffer.reserve(num_bytes); - buffer.assign(span.begin(), span.end()); + std::vector buffer(num_bytes); + memcpy(buffer.data(), bytes, num_bytes); lora_adapter->Load(std::move(buffer)); *adapter = reinterpret_cast(lora_adapter.release()); return nullptr; diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index a490cb8cb1a97..64546e634694f 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -39,7 +39,7 @@ #include "core/platform/ort_mutex.h" #include "core/common/string_helper.h" -#include "lora_adapters.h" +#include "core/session/lora_adapters.h" #ifdef USE_CUDA #include "core/providers/cuda/cuda_provider_factory.h" @@ -933,7 +933,8 @@ ORT_API_STATUS_IMPL(OrtApis::RunWithBinding, _Inout_ OrtSession* sess, _In_ cons status = session->Run(default_run_options, *binding_ptr->binding_); } else { if (!run_options->active_adapters.empty()) { - LOGS(*session->GetLogger(), WARNING) << "RunWithBinding() active adapters specified, but won't have effect"; + LOGS(*session->GetLogger(), WARNING) + << "RunWithBinding() has active adapters specified, but won't have an effect"; } status = session->Run(*run_options, *binding_ptr->binding_); } diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h index ab7d463819712..d3c5d926f6f3d 100644 --- a/onnxruntime/core/session/ort_apis.h +++ b/onnxruntime/core/session/ort_apis.h @@ -527,7 +527,7 @@ ORT_API_STATUS_IMPL(KernelInfoGetAllocator, _In_ const OrtKernelInfo* info, _In_ ORT_API_STATUS_IMPL(CreateLoraAdapter, _In_ const ORTCHAR_T* adapter_file_path, _In_ OrtAllocator* allocator, _Outptr_ OrtLoraAdapter** out); -ORT_API_STATUS_IMPL(CreateLoraAdapterFromArray, _In_ const uint8_t* bytes, size_t num_bytes, _In_ OrtAllocator* allocator, +ORT_API_STATUS_IMPL(CreateLoraAdapterFromArray, _In_ const void* bytes, size_t num_bytes, _In_ OrtAllocator* allocator, _Outptr_ OrtLoraAdapter** out); ORT_API(void, ReleaseLoraAdapter, _Frees_ptr_opt_ OrtLoraAdapter*); ORT_API_STATUS_IMPL(RunOptionsAddActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter* adapter); diff --git a/onnxruntime/lora/adapter_format/README.md b/onnxruntime/lora/adapter_format/README.md index 2bb107584d9ff..65011c93729f8 100644 --- a/onnxruntime/lora/adapter_format/README.md +++ b/onnxruntime/lora/adapter_format/README.md @@ -2,7 +2,7 @@ This directory contains [ONNXRuntime Lora Parameter format schema](lora_schema.fbs) and [the generated C++ header file](lora_schema.fbs.h) for the Lora Parameters file format. This file format is defined as a means to deliver Lora parameters so it can read by ONNXRuntime C++ code. -The format is generally designed to house a single Lora adapter named Lora parameters. +The format is generally designed to house a single Lora adapter with named Lora parameters. [ONNXRuntime Lora Parameter file format schema](lora_schema.fbs) uses the [FlatBuffers](https://github.com/google/flatbuffers) serialization library. diff --git a/onnxruntime/lora/adapter_format_utils.cc b/onnxruntime/lora/adapter_format_utils.cc index f9570376a8386..9a6f8f3b7b1c8 100644 --- a/onnxruntime/lora/adapter_format_utils.cc +++ b/onnxruntime/lora/adapter_format_utils.cc @@ -23,14 +23,6 @@ bool IsAdapterFormatModelBytes(const void* bytes, size_t num_bytes) { AdapterBufferHasIdentifier(bytes); } -flatbuffers::Offset SaveStringToLoraFormat(flatbuffers::FlatBufferBuilder& builder, - bool has_string, const std::string& src) { - if (has_string) return builder.CreateString(src); - - // If the string does not exist, return 0 (the string does not exist in flatbuffer) - return 0; -} - void LoadStringFromLoraFormat(std::string& dst, const flatbuffers::String* fbs_string) { if (fbs_string) { dst = fbs_string->str(); diff --git a/onnxruntime/lora/adapter_format_utils.h b/onnxruntime/lora/adapter_format_utils.h index 576872b86feaf..21a68e6846ac1 100644 --- a/onnxruntime/lora/adapter_format_utils.h +++ b/onnxruntime/lora/adapter_format_utils.h @@ -71,10 +71,6 @@ class AdapterFormatBuilder { /// bool IsAdapterFormatModelBytes(const void* bytes, size_t num_bytes); -// Will only create string in flatbuffers when has_string is true -flatbuffers::Offset SaveStringToLoraFormat(flatbuffers::FlatBufferBuilder& builder, - bool has_string, const std::string& src); - void LoadStringFromLoraFormat(std::string& dst, const flatbuffers::String* fbs_string); /// diff --git a/onnxruntime/lora/adapter_format_version.h b/onnxruntime/lora/adapter_format_version.h index 73b1ba34b31f2..e7cfc781d2e95 100644 --- a/onnxruntime/lora/adapter_format_version.h +++ b/onnxruntime/lora/adapter_format_version.h @@ -12,7 +12,7 @@ namespace adapters { // The current model versions for saving lora parameters in flatbuffers format. // Once this version is updated, the kSupportedAdapterFormatVersions in IsAdapterFormatVersionSupported // below will also need to be updated. -// See src/flatbuffers/schema/README.md for more details on versioning. +// See onnxruntime/lora/adapter_format/README.md for more details on versioning. // Version 1 - history begins constexpr const int kAdapterFormatVersion = 1; diff --git a/onnxruntime/python/onnxruntime_pybind_ortvalue.cc b/onnxruntime/python/onnxruntime_pybind_ortvalue.cc index 4b9b4c61e820c..e338634d73bd3 100644 --- a/onnxruntime/python/onnxruntime_pybind_ortvalue.cc +++ b/onnxruntime/python/onnxruntime_pybind_ortvalue.cc @@ -154,7 +154,7 @@ void addOrtValueMethods(pybind11::module& m) { const auto element_size = element_type->Size(); if (narrow(data.itemsize()) != element_size) { - ORT_THROW("Items size in the incoming aray: ", data.itemsize(), + ORT_THROW("Items size in the incoming array: ", data.itemsize(), " specified by onnxtype: ", element_size); } diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 66c912c8f24ea..5ac9c149bbe80 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -55,10 +55,6 @@ // (This static var is referenced in GetCudaToHostMemCpyFunction()) const OrtDevice::DeviceType OrtDevice::GPU; -namespace onnxruntime { - -} // namespace onnxruntime - #if defined(_MSC_VER) #pragma warning(disable : 4267 4996 4503) #endif // _MSC_VER @@ -170,6 +166,24 @@ void AsyncCallback(void* user_data, OrtValue** outputs, size_t num_outputs, OrtS } } +void AppendLoraParametersAsInputs(const RunOptions& run_options, + size_t total_entries, + NameMLValMap& feeds) { + for (const auto* adapter : run_options.active_adapters) { + total_entries += adapter->GetParamNum(); + } + feeds.reserve(total_entries + feeds.size()); + + // Append necessary inputs for active adapters + for (const auto* adapter : run_options.active_adapters) { + auto [begin, end] = adapter->GetParamIterators(); + for (; begin != end; ++begin) { + const auto& [name, param] = *begin; + feeds.insert(std::make_pair(name, param.GetMapped())); + } + } +} + template static py::object AddNonTensor(const OrtValue& val, const DataTransferManager* /*data_transfer_manager*/, @@ -1905,10 +1919,10 @@ RunOptions instance. The individual calls will exit gracefully and return an err }, R"pbdoc(Get a single run configuration value using the given configuration key.)pbdoc") .def( - "set_adapter_active", [](RunOptions* options, lora::LoraAdapter* adapter) { + "add_active_adapter", [](RunOptions* options, lora::LoraAdapter* adapter) { options->active_adapters.push_back(adapter); }, - R"pbdoc(Activates the specified lora adapter)pbdoc"); + R"pbdoc(Adds specified adapter as an active adapter)pbdoc"); py::class_(m, "ModelMetadata", R"pbdoc(Pre-defined and custom metadata about the model. It is usually used to identify the model used to run the prediction and @@ -2030,20 +2044,7 @@ including arg name, arg type (contains both type and shape).)pbdoc") -> py::list { NameMLValMap feeds; if (run_options != nullptr && !run_options->active_adapters.empty()) { - size_t total_entries = pyfeeds.size(); - for (const auto* adapter : run_options->active_adapters) { - total_entries += adapter->GetParamNum(); - } - feeds.reserve(total_entries); - - // Append necessary inputs for active adapters - for (const auto* adapter : run_options->active_adapters) { - auto [begin, end] = adapter->GetParamIterators(); - for (; begin != end; ++begin) { - const auto& [name, param] = *begin; - feeds.insert(std::make_pair(name, param.GetMapped())); - } - } + AppendLoraParametersAsInputs(*run_options, pyfeeds.size(), feeds); } else { feeds.reserve(pyfeeds.size()); } @@ -2104,6 +2105,11 @@ including arg name, arg type (contains both type and shape).)pbdoc") PyCallback callback, py::object user_data = {}, RunOptions* run_options = nullptr) -> void { + if (run_options != nullptr && !run_options->active_adapters.empty()) { + LOGS(*sess->GetSessionHandle()->GetLogger(), WARNING) + << "run_async has active adapters specified, but won't have an effect"; + } + std::unique_ptr async_resource = std::make_unique(); async_resource->callback = callback; async_resource->user_data = user_data; @@ -2149,7 +2155,12 @@ including arg name, arg type (contains both type and shape).)pbdoc") /// a Tensor, SparseTensor or a TensorSequence. .def("run_with_ort_values", [](PyInferenceSession* sess, const py::dict& feeds, const std::vector& output_names, RunOptions* run_options = nullptr) -> std::vector { NameMLValMap ort_feeds; - ort_feeds.reserve(feeds.size()); + if (run_options != nullptr && !run_options->active_adapters.empty()) { + AppendLoraParametersAsInputs(*run_options, feeds.size(), ort_feeds); + } else { + ort_feeds.reserve(feeds.size()); + } + // item is always a copy since dict returns a value and not a ref // and Apple XToolChain barks for (const auto& item : feeds) { @@ -2172,6 +2183,11 @@ including arg name, arg type (contains both type and shape).)pbdoc") return fetches; }) .def("run_with_ortvaluevector", [](PyInferenceSession* sess, RunOptions run_options, const std::vector& feed_names, const std::vector& feeds, const std::vector& fetch_names, std::vector& fetches, const std::vector& fetch_devices) -> void { + if (!run_options.active_adapters.empty()) { + LOGS(*sess->GetSessionHandle()->GetLogger(), WARNING) + << "run_with_ortvaluevector has active adapters specified, but won't have an effect"; + } + // release GIL to allow multiple python threads to invoke Run() in parallel. py::gil_scoped_release release; OrtPybindThrowIfError(sess->GetSessionHandle()->Run(run_options, feed_names, feeds, fetch_names, &fetches, &fetch_devices)); diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index c2aede3e00875..e8291a36447ca 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -180,13 +180,20 @@ TEST(LoraAdapterTest, Load) { // Test different data types const auto data_types = gsl::make_span(adapters::EnumValuesTensorDataType()); for (size_t i = 1, size = data_types.size(); i < size; ++i) { - if (i == 8 || i == 9 || i == 14 || i == 15 || (i > 16 && i < 21)) + const auto dt = data_types[i]; + + using namespace adapters; + if (dt == TensorDataType::STRING || + dt == TensorDataType::BOOL || + dt == TensorDataType::COMPLEX64 || + dt == TensorDataType::COMPLEX128 || + static_cast(dt) >= static_cast(TensorDataType::BFLOAT16)) continue; - utils::MLTypeCallDispatcher + onnxruntime::utils::MLTypeCallDispatcher disp(static_cast(data_types[i])); disp.Invoke(); } diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py index 3ce1956f1734b..9419761340517 100644 --- a/onnxruntime/test/python/onnxruntime_test_python.py +++ b/onnxruntime/test/python/onnxruntime_test_python.py @@ -1834,7 +1834,8 @@ def test_multiple_devices(self): def test_adater_export_read(self): adapter_version = 1 model_version = 1 - exported_adapter_file = "test_adapter.onnx_adapter" + file_path = pathlib.Path(os.path.realpath(__file__)).parent + file_path = str(file_path / "test_adapter.onnx_adapter") float_data_type = 1 int64_data_type = 7 @@ -1852,10 +1853,10 @@ def test_adater_export_read(self): adapter_format.set_model_version(model_version) adapter_format.set_parameters(params) - adapter_format.export_adapter(exported_adapter_file) + adapter_format.export_adapter(file_path) - adapter_format_read = onnxrt.AdapterFormat.read_adapter(exported_adapter_file) - os.remove(exported_adapter_file) + adapter_format_read = onnxrt.AdapterFormat.read_adapter(file_path) + os.remove(file_path) self.assertEqual(adapter_version, adapter_format_read.get_adapter_version()) self.assertEqual(model_version, adapter_format_read.get_model_version()) @@ -1871,8 +1872,9 @@ def test_adater_export_read(self): np.testing.assert_allclose(expected_val.numpy(), value.numpy()) def test_run_with_adapter(self): - adapter_path = get_name("lora/two_params_lora_model.onnx_adapter") model_path = get_name("lora/two_params_lora_model.onnx") + file_path = os.getcwd() + "/" + get_name("lora/two_params_lora_model.onnx_adapter") + adapter_path = os.path.abspath(file_path) expected_output = np.array( [ @@ -1888,7 +1890,7 @@ def test_run_with_adapter(self): adapter.Load(adapter_path) run_options = onnxrt.RunOptions() - run_options.set_adapter_active(adapter) + run_options.add_active_adapter(adapter) session = onnxrt.InferenceSession(model_path) inputs = {"input_x": np.ones((4, 4), dtype=np.float32)} diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc index b36a2bfe94cd2..782992e90bd39 100644 --- a/onnxruntime/test/shared_lib/test_inference.cc +++ b/onnxruntime/test/shared_lib/test_inference.cc @@ -4467,12 +4467,12 @@ static Ort::LoraAdapter CreateAdapterFromArray() { return Ort::LoraAdapter::CreateLoraAdapterFromArray(buffer.data(), buffer.size(), nullptr); } -TEST(CApi, RunWithLoraAdapterFromFile) { +TEST(CApiTest, RunWithLoraAdapterFromFile) { auto adapter = CreateAdapterFromFile(); TestRunWithLoraAdapter(adapter); } -TEST(CApi, RunWithLoraAdapterFromArray) { +TEST(CApiTest, RunWithLoraAdapterFromArray) { auto adapter = CreateAdapterFromArray(); TestRunWithLoraAdapter(adapter); } From d6e71b67fc8de58db86553891971018020922bb5 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Sat, 28 Sep 2024 15:42:06 -0700 Subject: [PATCH 81/84] Fix test path to absolute --- onnxruntime/test/shared_lib/test_inference.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc index 782992e90bd39..d880015cad68b 100644 --- a/onnxruntime/test/shared_lib/test_inference.cc +++ b/onnxruntime/test/shared_lib/test_inference.cc @@ -4448,7 +4448,8 @@ static void TestRunWithLoraAdapter(const Ort::LoraAdapter& adapter) { static Ort::LoraAdapter CreateAdapterFromFile() { constexpr const ORTCHAR_T* adapter_path = TSTR("testdata/lora/two_params_lora_model.onnx_adapter"); - return Ort::LoraAdapter::CreateLoraAdapter(adapter_path, nullptr); + const auto adapter_abs_path = std::filesystem::absolute(adapter_path); + return Ort::LoraAdapter::CreateLoraAdapter(adapter_abs_path, nullptr); } static Ort::LoraAdapter CreateAdapterFromArray() { From cfd271451949528fd7ad6ccddf98cf4e0432008b Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Sat, 28 Sep 2024 16:32:00 -0700 Subject: [PATCH 82/84] Replace absolute() with current_path() + relative --- onnxruntime/test/shared_lib/test_inference.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc index d880015cad68b..b4132e0909dea 100644 --- a/onnxruntime/test/shared_lib/test_inference.cc +++ b/onnxruntime/test/shared_lib/test_inference.cc @@ -4448,7 +4448,8 @@ static void TestRunWithLoraAdapter(const Ort::LoraAdapter& adapter) { static Ort::LoraAdapter CreateAdapterFromFile() { constexpr const ORTCHAR_T* adapter_path = TSTR("testdata/lora/two_params_lora_model.onnx_adapter"); - const auto adapter_abs_path = std::filesystem::absolute(adapter_path); + // std::filesystem::absolute is not supported by all compilers + auto adapter_abs_path = std::filesystem::current_path().append(adapter_path); return Ort::LoraAdapter::CreateLoraAdapter(adapter_abs_path, nullptr); } From b34f75f9537723677232ff6828ce2dd1647c5d22 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Sat, 28 Sep 2024 17:52:33 -0700 Subject: [PATCH 83/84] remove requirements for absolute path --- include/onnxruntime/core/session/onnxruntime_c_api.h | 2 +- include/onnxruntime/core/session/onnxruntime_cxx_api.h | 4 ++-- include/onnxruntime/core/session/onnxruntime_cxx_inline.h | 4 ++-- onnxruntime/core/session/lora_adapters.cc | 8 -------- onnxruntime/test/shared_lib/test_inference.cc | 4 +--- 5 files changed, 6 insertions(+), 16 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index a2471ff1e6db9..8237ac4220f24 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -4679,7 +4679,7 @@ struct OrtApi { * format. The function attempts to validate the format at load time. The file will always be memory mapped, unless * the platform does not support memory mapping, in which case the file will be read into memory. * - * \param[in] adapter_file_path Absolute file path to the adapter file. + * \param[in] adapter_file_path adapter file path. * \param[in] allocator optional pointer to a device allocator. If specified * data is copied to the device at some point before Run() is invoked. If nullptr, data stays on CPU. * The data would still be copied to device if required by the model at inference time. diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h index df8675d05c03a..12a6a5c87c0aa 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h @@ -746,10 +746,10 @@ struct LoraAdapter : detail::Base { /// \brief Wraps OrtApi::CreateLoraAdapter /// /// The function attempts to load the adapter from the specified file - /// \param absolute_adapter_path The absolute path to the Lora adapter + /// \param adapter_path The path to the Lora adapter /// \param allocator optional pointer to a device allocator. If nullptr, the data stays on CPU. It would still /// be copied to device if required by the model at inference time. - static LoraAdapter CreateLoraAdapter(const std::basic_string& absolute_adapter_path, + static LoraAdapter CreateLoraAdapter(const std::basic_string& adapter_path, OrtAllocator* allocator); /// \brief Wraps OrtApi::CreateLoraAdapterFromArray diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h index 0f6d84eaed716..7401cb2438121 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h @@ -557,10 +557,10 @@ inline void CustomOpDomain::Add(const OrtCustomOp* op) { ThrowOnError(GetApi().CustomOpDomain_Add(p_, op)); } -inline LoraAdapter LoraAdapter::CreateLoraAdapter(const std::basic_string& absolute_adapter_path, +inline LoraAdapter LoraAdapter::CreateLoraAdapter(const std::basic_string& adapter_path, OrtAllocator* allocator) { OrtLoraAdapter* p; - ThrowOnError(GetApi().CreateLoraAdapter(absolute_adapter_path.c_str(), allocator, &p)); + ThrowOnError(GetApi().CreateLoraAdapter(adapter_path.c_str(), allocator, &p)); return LoraAdapter{p}; } diff --git a/onnxruntime/core/session/lora_adapters.cc b/onnxruntime/core/session/lora_adapters.cc index fef3c266b7d45..466edce187a56 100644 --- a/onnxruntime/core/session/lora_adapters.cc +++ b/onnxruntime/core/session/lora_adapters.cc @@ -31,14 +31,7 @@ LoraAdapter::Param::Param(OrtValue ort_value_mapped, OrtValue ort_value_device) : ort_value_mapped_(std::move(ort_value_mapped)), ort_value_device_(std::move(ort_value_device)) { } -static void CheckPathIsAbsolute(const std::filesystem::path& file_path) { - if (!file_path.is_absolute()) { - ORT_THROW("Expecting an absolute path: ", file_path); - } -} - void LoraAdapter::Load(const std::filesystem::path& file_path) { - CheckPathIsAbsolute(file_path); auto buffer = adapters::utils::LoadLoraAdapterBytes(file_path); Load(std::move(buffer)); } @@ -50,7 +43,6 @@ void LoraAdapter::Load(std::vector buffer) { } void LoraAdapter::MemoryMap(const std::filesystem::path& file_path) { - CheckPathIsAbsolute(file_path); auto [mapped_memory, file_size] = adapters::utils::MemoryMapAdapterFile(file_path); auto u8_span = ReinterpretAsSpan(gsl::make_span(mapped_memory.get(), file_size)); adapter_ = adapters::utils::ValidateAndGetAdapterFromBytes(u8_span); diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc index b4132e0909dea..782992e90bd39 100644 --- a/onnxruntime/test/shared_lib/test_inference.cc +++ b/onnxruntime/test/shared_lib/test_inference.cc @@ -4448,9 +4448,7 @@ static void TestRunWithLoraAdapter(const Ort::LoraAdapter& adapter) { static Ort::LoraAdapter CreateAdapterFromFile() { constexpr const ORTCHAR_T* adapter_path = TSTR("testdata/lora/two_params_lora_model.onnx_adapter"); - // std::filesystem::absolute is not supported by all compilers - auto adapter_abs_path = std::filesystem::current_path().append(adapter_path); - return Ort::LoraAdapter::CreateLoraAdapter(adapter_abs_path, nullptr); + return Ort::LoraAdapter::CreateLoraAdapter(adapter_path, nullptr); } static Ort::LoraAdapter CreateAdapterFromArray() { From b1120e78709fe4dfecbfd9a953f20ea7661f8eca Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Mon, 30 Sep 2024 11:04:50 -0700 Subject: [PATCH 84/84] Remove proto duplicate --- onnxruntime/core/session/ort_apis.h | 1 - 1 file changed, 1 deletion(-) diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h index d3c5d926f6f3d..9054246873232 100644 --- a/onnxruntime/core/session/ort_apis.h +++ b/onnxruntime/core/session/ort_apis.h @@ -118,7 +118,6 @@ ORT_API_STATUS_IMPL(RunOptionsGetRunTag, _In_ const OrtRunOptions*, _Out_ const ORT_API_STATUS_IMPL(RunOptionsSetTerminate, _Inout_ OrtRunOptions* options); ORT_API_STATUS_IMPL(RunOptionsUnsetTerminate, _Inout_ OrtRunOptions* options); -ORT_API_STATUS_IMPL(RunOptionsAddActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter*); ORT_API_STATUS_IMPL(CreateTensorAsOrtValue, _Inout_ OrtAllocator* allocator, _In_ const int64_t* shape, size_t shape_len, ONNXTensorElementDataType type,