Skip to content

Commit

Permalink
Accomodate BE platforms. Make sure we always write flatbuffers LE (#2…
Browse files Browse the repository at this point in the history
…2375)

### Description
<!-- Describe your changes. -->
flatbuffers always write data in LE and it is automatically traslated
to/from BE as needed,
but only if we use proper accessors. This would work for shape.
However, we store parameters as bytes, so we need to swap bytes as
needed for BE.

### Motivation and Context
Address #22364
  • Loading branch information
yuslepukhin authored Oct 11, 2024
1 parent c06ecd4 commit f1f3d94
Showing 1 changed file with 85 additions and 10 deletions.
95 changes: 85 additions & 10 deletions onnxruntime/lora/adapter_format_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

#include "core/framework/allocator.h"
#include "core/common/common.h"
#include "core/framework/endian.h"
#include "core/framework/endian_utils.h"
#include "core/common/span_utils.h"
#include "core/framework/ortdevice.h"
#include "core/framework/ortmemoryinfo.h"
Expand Down Expand Up @@ -75,35 +77,108 @@ const Adapter* ValidateAndGetAdapterFromBytes(gsl::span<const uint8_t> bytes) {
return adapter;
}

template <class T>
struct WriteDataForLittleEndian {
Status operator()(gsl::span<const uint8_t> src, gsl::span<unsigned char> dest) const {
auto src_span = ReinterpretAsSpan<const T>(src);
return onnxruntime::utils::WriteLittleEndian<T>(src_span, dest);
}
};

void SaveLoraParameter(flatbuffers::FlatBufferBuilder& flat_builder, std::string_view name,
TensorDataType data_type, gsl::span<const int64_t> shape,
gsl::span<const uint8_t> data,
flatbuffers::Offset<Parameter>& fbs_tensor) {
auto name_str = (name.empty()) ? 0 : flat_builder.CreateString(name.data(), name.size());
auto shape_vec = flat_builder.CreateVector(shape.data(), shape.size());
auto data_vec = flat_builder.CreateVector(data.data(), data.size());

flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data_vec;
if constexpr (endian::native == endian::big) {
const auto elem_type = DataTypeImpl::TensorTypeFromONNXEnum(static_cast<int32_t>(data_type))->GetElementType();
if (elem_type->Size() > 1) {
InlinedVector<uint8_t> be_data(data.size());
auto be_data_span = ReinterpretAsSpan<unsigned char>(AsSpan(be_data));

onnxruntime::utils::MLTypeCallDispatcher<float, double,
int16_t, uint16_t, int32_t, uint32_t,
int64_t, uint64_t,
BFloat16, MLFloat16>
disp(static_cast<int32_t>(data_type));

ORT_THROW_IF_ERROR((disp.InvokeRet<Status, WriteDataForLittleEndian>(data, be_data_span)));
data_vec = flat_builder.CreateVector<uint8_t>(be_data.data(), be_data.size());
} else {
data_vec = flat_builder.CreateVector(data.data(), data.size());
}
} else {
data_vec = flat_builder.CreateVector(data.data(), data.size());
}
fbs_tensor = CreateParameter(flat_builder, name_str, shape_vec, data_type, data_vec);
}

template <class T>
struct ReadDataForBigEndian {
Status operator()(gsl::span<const unsigned char> src, Tensor& dst) const {
auto dst_span = dst.MutableDataAsSpan<T>();
return onnxruntime::utils::ReadLittleEndian<T>(src, dst_span);
}
};

// If BE, we a allocate memory within the tensor and copy there swapping bytes
[[maybe_unused]] static Status CreateOrtValueForBePlatforms(const Parameter& param, const MLDataType elem_type,
gsl::span<const int64_t> shape, OrtValue& result) {
static const AllocatorPtr cpu_allocator = std::make_shared<CPUAllocator>();

auto src_span = ReinterpretAsSpan<const unsigned char>(
gsl::make_span<const uint8_t>(param.raw_data()->data(), param.raw_data()->size()));

const auto data_type = param.data_type();

Tensor tensor(elem_type, shape, cpu_allocator);
onnxruntime::utils::MLTypeCallDispatcher<float, double,
int16_t, uint16_t, int32_t, uint32_t,
int64_t, uint64_t,
BFloat16, MLFloat16>
disp(static_cast<int32_t>(data_type));

ORT_RETURN_IF_ERROR((disp.InvokeRet<Status, ReadDataForBigEndian>(src_span, tensor)));
Tensor::InitOrtValue(std::move(tensor), result);
return Status::OK();
}

std::pair<std::string, OrtValue> CreateOrtValueOverLoraParameter(const Parameter& param) {
OrtValue result;

std::string name;
LoadStringFromLoraFormat(name, param.name());

const auto data_type = param.data_type();
gsl::span<const int64_t> shape_span(param.dims()->data(), param.dims()->size());

// Copying shape takes care of endianess using flatbuffers accessors
TensorShapeVector shape(param.dims()->begin(), param.dims()->end());
const auto elem_type = DataTypeImpl::TensorTypeFromONNXEnum(static_cast<int32_t>(data_type))->GetElementType();
static const OrtMemoryInfo cpu_meminfo(CPU, OrtAllocatorType::OrtDeviceAllocator);

auto elem_type = DataTypeImpl::TensorTypeFromONNXEnum(static_cast<int32_t>(data_type))->GetElementType();
// const_cast is necessery due to Tensor class API
Tensor::InitOrtValue(elem_type,
TensorShape(shape_span),
const_cast<uint8_t*>(param.raw_data()->data()),
cpu_meminfo,
result);
if constexpr (endian::native == endian::big) {
if (elem_type->Size() > 1) {
ORT_THROW_IF_ERROR(CreateOrtValueForBePlatforms(param, elem_type, shape, result));
} else {
// Single byte elements allow us to create OrtValue directly on top
// of raw data
// const_cast is necessary due to Tensor class API
Tensor::InitOrtValue(elem_type,
TensorShape(shape),
const_cast<uint8_t*>(param.raw_data()->data()),
cpu_meminfo,
result);
}
} else {
// const_cast is necessary due to Tensor class API
Tensor::InitOrtValue(elem_type,
TensorShape(shape),
const_cast<uint8_t*>(param.raw_data()->data()),
cpu_meminfo,
result);
}

return std::make_pair(std::move(name), std::move(result));
}
Expand Down

0 comments on commit f1f3d94

Please sign in to comment.