Skip to content

Commit

Permalink
Revert Implement DML copy for Lora Adapters (microsoft#22814)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangxiang1993 authored and ankitm3k committed Dec 11, 2024
1 parent 9156269 commit 633cdcc
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 79 deletions.
24 changes: 0 additions & 24 deletions onnxruntime/core/session/lora_adapters.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,19 +52,6 @@ void LoraAdapter::MemoryMap(const std::filesystem::path& file_path) {

static std::unique_ptr<IDataTransfer> GetDataTransfer(const OrtMemoryInfo& mem_info) {
std::unique_ptr<IDataTransfer> data_transfer;
bool is_dml = false;
Status CopyTensor(const Tensor& src, Tensor& dst) const {
return data_transfer->CopyTensor(src, dst);
}
Status Sync() const {
if (is_dml) {
return ep->Sync();
} else {
return Status::OK();
}
}
};
} // namespace

if (strcmp(mem_info.name, onnxruntime::CPU) == 0) {
return data_transfer;
Expand All @@ -77,17 +64,6 @@ static std::unique_ptr<IDataTransfer> GetDataTransfer(const OrtMemoryInfo& mem_i
data_transfer = cuda_provider_info->CreateGPUDataTransfer();
}
#endif
} else if (strcmp(mem_info.name, onnxruntime::DML) == 0) {
#ifdef USE_DML
auto ep_factory = onnxruntime::DMLProviderFactoryCreator::Create(ConfigOptions{}, 0, false, false, false);
dt.ep = ep_factory->CreateProvider();
dt.is_dml = true;
dt.data_transfer = dt.ep->GetDataTransfer();
#else
status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "DML provider is not enabled in this build");
#endif
} else {
status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Unsupported device allocator");
}

return data_transfer;
Expand Down
57 changes: 2 additions & 55 deletions onnxruntime/test/lora/lora_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -200,15 +200,7 @@ TEST(LoraAdapterTest, Load) {
}

#ifdef USE_CUDA
TEST(LoraAdapterTest, VerifyCudaDeviceCopy) {
if (DefaultCudaExecutionProvider() == nullptr) {
GTEST_SKIP() << "Skip This Test Due to this EP is null";
}
#ifdef USE_DML
if (DefaultDmlExecutionProvider() != nullptr) {
GTEST_FAIL() << "It should not run with DML EP";
}
#endif
TEST(LoraAdapterTest, VerifyDeviceCopy) {
auto cpu_ep = DefaultCpuExecutionProvider();
auto cpu_allocator = cpu_ep->CreatePreferredAllocators()[0];
auto cuda_ep = DefaultCudaExecutionProvider();
Expand All @@ -230,52 +222,7 @@ TEST(LoraAdapterTest, VerifyCudaDeviceCopy) {
ASSERT_EQ(tensor_cpu.Shape().Size(), tensor_device.Shape().Size());

Tensor copy(tensor_cpu.DataType(), tensor_cpu.Shape(), cpu_allocator);
ASSERT_TRUE(cuda_transfer->CanCopy(tensor_device.Location().device,
copy.Location().device));
ASSERT_STATUS_OK(cuda_transfer->CopyTensor(tensor_device, copy));

auto expected_span = tensor_cpu.DataAsSpan<float>();
auto copy_span = copy.DataAsSpan<float>();

ASSERT_EQ(expected_span, copy_span);
}
}
#endif

#ifdef USE_DML
TEST(LoraAdapterTest, VerifyDmlDeviceCopy) {
// NO_DML_TEST is set, DML test is skipped
if (DefaultDmlExecutionProvider() == nullptr) {
GTEST_SKIP() << "Skip This Test Due to this EP is null";
}

#ifdef USE_CUDA
if (DefaultCudaExecutionProvider() != nullptr) {
GTEST_FAIL() << "It should not run with CUDA EP";
}
#endif

auto cpu_ep = DefaultCpuExecutionProvider();
auto cpu_allocator = cpu_ep->CreatePreferredAllocators()[0];

auto dml_allocator = DefaultDmlExecutionProvider()->CreatePreferredAllocators()[0];
auto dml_transfer = DefaultDmlExecutionProvider()->GetDataTransfer();

auto test_params = GenerateTestParameters<float>()();
lora::LoraAdapter adapter(std::move(dml_allocator));
adapter.Load(std::move(test_params));

auto [begin, end] = adapter.GetParamIterators();
for (; begin != end; ++begin) {
const auto& [_, param] = *begin;
const auto& tensor_device = param.GetDeviceOrMapped().Get<Tensor>();
ASSERT_EQ(0, strcmp(tensor_device.Location().name, onnxruntime::DML));

const auto& tensor_cpu = param.GetMapped().Get<Tensor>();
ASSERT_EQ(tensor_cpu.Shape().Size(), tensor_device.Shape().Size());

Tensor copy(tensor_cpu.DataType(), tensor_cpu.Shape(), cpu_allocator);
ASSERT_TRUE(dml_transfer->CanCopy(tensor_device.Location().device,
ASSERT_TRUE(gpu_transfer->CanCopy(tensor_device.Location().device,
copy.Location().device));
ASSERT_STATUS_OK(gpu_transfer->CopyTensor(tensor_device, copy));

Expand Down

0 comments on commit 633cdcc

Please sign in to comment.