Accomodate BE platforms. Make sure we always write flatbuffers LE (#2…

…2375) ### Description  flatbuffers always write data in LE and it is automatically traslated to/from BE as needed, but only if we use proper accessors. This would work for shape. However, we store parameters as bytes, so we need to swap bytes as needed for BE. ### Motivation and Context Address #22364
microsoft · Oct 11, 2024 · f1f3d94 · f1f3d94
1 parent c06ecd4
commit f1f3d94
Showing 1 changed file with 85 additions and 10 deletions.
diff --git a/onnxruntime/lora/adapter_format_utils.cc b/onnxruntime/lora/adapter_format_utils.cc
@@ -6,6 +6,8 @@
 
 #include "core/framework/allocator.h"
 #include "core/common/common.h"
+#include "core/framework/endian.h"
+#include "core/framework/endian_utils.h"
 #include "core/common/span_utils.h"
 #include "core/framework/ortdevice.h"
 #include "core/framework/ortmemoryinfo.h"
@@ -75,35 +77,108 @@ const Adapter* ValidateAndGetAdapterFromBytes(gsl::span<const uint8_t> bytes) {
   return adapter;
 }
 
+template <class T>
+struct WriteDataForLittleEndian {
+  Status operator()(gsl::span<const uint8_t> src, gsl::span<unsigned char> dest) const {
+    auto src_span = ReinterpretAsSpan<const T>(src);
+    return onnxruntime::utils::WriteLittleEndian<T>(src_span, dest);
+  }
+};
+
 void SaveLoraParameter(flatbuffers::FlatBufferBuilder& flat_builder, std::string_view name,
                        TensorDataType data_type, gsl::span<const int64_t> shape,
                        gsl::span<const uint8_t> data,
                        flatbuffers::Offset<Parameter>& fbs_tensor) {
   auto name_str = (name.empty()) ? 0 : flat_builder.CreateString(name.data(), name.size());
   auto shape_vec = flat_builder.CreateVector(shape.data(), shape.size());
-  auto data_vec = flat_builder.CreateVector(data.data(), data.size());
 
+  flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data_vec;
+  if constexpr (endian::native == endian::big) {
+    const auto elem_type = DataTypeImpl::TensorTypeFromONNXEnum(static_cast<int32_t>(data_type))->GetElementType();
+    if (elem_type->Size() > 1) {
+      InlinedVector<uint8_t> be_data(data.size());
+      auto be_data_span = ReinterpretAsSpan<unsigned char>(AsSpan(be_data));
+
+      onnxruntime::utils::MLTypeCallDispatcher<float, double,
+                                               int16_t, uint16_t, int32_t, uint32_t,
+                                               int64_t, uint64_t,
+                                               BFloat16, MLFloat16>
+          disp(static_cast<int32_t>(data_type));
+
+      ORT_THROW_IF_ERROR((disp.InvokeRet<Status, WriteDataForLittleEndian>(data, be_data_span)));
+      data_vec = flat_builder.CreateVector<uint8_t>(be_data.data(), be_data.size());
+    } else {
+      data_vec = flat_builder.CreateVector(data.data(), data.size());
+    }
+  } else {
+    data_vec = flat_builder.CreateVector(data.data(), data.size());
+  }
   fbs_tensor = CreateParameter(flat_builder, name_str, shape_vec, data_type, data_vec);
 }
 
+template <class T>
+struct ReadDataForBigEndian {
+  Status operator()(gsl::span<const unsigned char> src, Tensor& dst) const {
+    auto dst_span = dst.MutableDataAsSpan<T>();
+    return onnxruntime::utils::ReadLittleEndian<T>(src, dst_span);
+  }
+};
+
+// If BE, we a allocate memory within the tensor and copy there swapping bytes
+[[maybe_unused]] static Status CreateOrtValueForBePlatforms(const Parameter& param, const MLDataType elem_type,
+                                                            gsl::span<const int64_t> shape, OrtValue& result) {
+  static const AllocatorPtr cpu_allocator = std::make_shared<CPUAllocator>();
+
+  auto src_span = ReinterpretAsSpan<const unsigned char>(
+      gsl::make_span<const uint8_t>(param.raw_data()->data(), param.raw_data()->size()));
+
+  const auto data_type = param.data_type();
+
+  Tensor tensor(elem_type, shape, cpu_allocator);
+  onnxruntime::utils::MLTypeCallDispatcher<float, double,
+                                           int16_t, uint16_t, int32_t, uint32_t,
+                                           int64_t, uint64_t,
+                                           BFloat16, MLFloat16>
+      disp(static_cast<int32_t>(data_type));
+
+  ORT_RETURN_IF_ERROR((disp.InvokeRet<Status, ReadDataForBigEndian>(src_span, tensor)));
+  Tensor::InitOrtValue(std::move(tensor), result);
+  return Status::OK();
+}
+
 std::pair<std::string, OrtValue> CreateOrtValueOverLoraParameter(const Parameter& param) {
   OrtValue result;
 
   std::string name;
   LoadStringFromLoraFormat(name, param.name());
 
   const auto data_type = param.data_type();
-  gsl::span<const int64_t> shape_span(param.dims()->data(), param.dims()->size());
-
+  // Copying shape takes care of endianess using flatbuffers accessors
+  TensorShapeVector shape(param.dims()->begin(), param.dims()->end());
+  const auto elem_type = DataTypeImpl::TensorTypeFromONNXEnum(static_cast<int32_t>(data_type))->GetElementType();
   static const OrtMemoryInfo cpu_meminfo(CPU, OrtAllocatorType::OrtDeviceAllocator);
 
-  auto elem_type = DataTypeImpl::TensorTypeFromONNXEnum(static_cast<int32_t>(data_type))->GetElementType();
-  // const_cast is necessery due to Tensor class API
-  Tensor::InitOrtValue(elem_type,
-                       TensorShape(shape_span),
-                       const_cast<uint8_t*>(param.raw_data()->data()),
-                       cpu_meminfo,
-                       result);
+  if constexpr (endian::native == endian::big) {
+    if (elem_type->Size() > 1) {
+      ORT_THROW_IF_ERROR(CreateOrtValueForBePlatforms(param, elem_type, shape, result));
+    } else {
+      // Single byte elements allow us to create OrtValue directly on top
+      // of raw data
+      // const_cast is necessary due to Tensor class API
+      Tensor::InitOrtValue(elem_type,
+                           TensorShape(shape),
+                           const_cast<uint8_t*>(param.raw_data()->data()),
+                           cpu_meminfo,
+                           result);
+    }
+  } else {
+    // const_cast is necessary due to Tensor class API
+    Tensor::InitOrtValue(elem_type,
+                         TensorShape(shape),
+                         const_cast<uint8_t*>(param.raw_data()->data()),
+                         cpu_meminfo,
+                         result);
+  }
 
   return std::make_pair(std::move(name), std::move(result));
 }