From 1d6a224b3901000d1b17cb8b9f1edb9a8dd639a4 Mon Sep 17 00:00:00 2001 From: Ningxin Hu Date: Wed, 17 May 2023 16:52:15 +0800 Subject: [PATCH] Fix using incorrect index for output buffer bindings When binding an output buffer for graph execution, the index of DML_BUFFER_BINDING should keep aligned with DML_OUTPUT_GRAPH_EDGE_DESC.GraphOutputIndex when compile the graph. --- .../ml/webnn/dml/graph_desc_builder.cc | 7 ++-- .../browser/ml/webnn/dml/graph_desc_builder.h | 12 ++++-- .../browser/ml/webnn/dml/graph_dml_impl.cc | 38 +++++++++---------- .../browser/ml/webnn/dml/readback_resource.cc | 9 +++-- .../browser/ml/webnn/dml/readback_resource.h | 3 +- 5 files changed, 38 insertions(+), 31 deletions(-) diff --git a/content/browser/ml/webnn/dml/graph_desc_builder.cc b/content/browser/ml/webnn/dml/graph_desc_builder.cc index 35c9f8160b95bd..dc8a83f826dc5d 100644 --- a/content/browser/ml/webnn/dml/graph_desc_builder.cc +++ b/content/browser/ml/webnn/dml/graph_desc_builder.cc @@ -102,8 +102,9 @@ void GraphDescBuilder::AddOutputEdge(NodeOutput* node_output, output_edge.GraphOutputIndex = output_index; graph_desc_.output_edges.push_back(output_edge); - named_outputs_[name] = - node_output->GetTensorDesc().GetTotalTensorSizeInBytes(); + named_outputs_[name] = { + .index = output_index, + .byte_length = node_output->GetTensorDesc().GetTotalTensorSizeInBytes()}; } ComPtr GraphDescBuilder::Compile( @@ -171,7 +172,7 @@ std::vector& GraphDescBuilder::GetInputNodes() { return input_nodes_; } -std::map& GraphDescBuilder::GetNamedOutputs() { +std::map& GraphDescBuilder::GetNamedOutputs() { return named_outputs_; } diff --git a/content/browser/ml/webnn/dml/graph_desc_builder.h b/content/browser/ml/webnn/dml/graph_desc_builder.h index a29f7ccf9567f9..2ef4aa0639c347 100644 --- a/content/browser/ml/webnn/dml/graph_desc_builder.h +++ b/content/browser/ml/webnn/dml/graph_desc_builder.h @@ -15,6 +15,11 @@ namespace content::webnn { +struct OutputInfo { + size_t index; + size_t byte_length; +}; + class GraphDescBuilder final { public: explicit GraphDescBuilder(ComPtr device); @@ -31,7 +36,7 @@ class GraphDescBuilder final { ComPtr Compile(DML_EXECUTION_FLAGS flags); std::vector& GetInputNodes(); - std::map& GetNamedOutputs(); + std::map& GetNamedOutputs(); private: struct GraphDesc { @@ -46,12 +51,13 @@ class GraphDescBuilder final { // The inputs node include inputs for execution and constant for // initialization because Both of them are inputs for DirectML Graph. + // The input node index is same as the offset in this vector. std::vector input_nodes_; // The operator nodes hold a reference of IDMLOperator to be used for // GraphDesc.nodes std::vector operator_nodes_; - // The output name and byte length mapping. - std::map named_outputs_; + // The output name to output index and byte length mapping. + std::map named_outputs_; GraphDesc graph_desc_; ComPtr device_; }; diff --git a/content/browser/ml/webnn/dml/graph_dml_impl.cc b/content/browser/ml/webnn/dml/graph_dml_impl.cc index d29db9e06a4146..497c2505c5ea50 100644 --- a/content/browser/ml/webnn/dml/graph_dml_impl.cc +++ b/content/browser/ml/webnn/dml/graph_dml_impl.cc @@ -1886,26 +1886,25 @@ void GraphDMLImpl::Compute(NamedResourcesPtr named_inputs, outputs_resource = execution_resources->Allocate( ResourceType::kOutput, outputs_resource_size, this); } - auto& output_length_map = graph_desc_builder_->GetNamedOutputs(); - std::vector output_binding_desc(output_length_map.size()); + auto& output_info_map = graph_desc_builder_->GetNamedOutputs(); + std::vector output_binding_desc(output_info_map.size()); // The sort of the outputs from Graph Compute is different from the // outputs from Graph Build, so the offset need to be found the correct output // with name to read back from GPU buffer. base::flat_map output_buffer_binding; // Reseve the map capacity to avoid reallocation. - output_buffer_binding.reserve(output_length_map.size()); + output_buffer_binding.reserve(output_info_map.size()); uint64_t aligned_offset = 0; - size_t i = 0; - for (auto& [name, byte_length] : output_length_map) { + for (auto& [name, output_info] : output_info_map) { DML_BUFFER_BINDING buffer_binding; buffer_binding.Buffer = outputs_resource; buffer_binding.Offset = aligned_offset; - buffer_binding.SizeInBytes = byte_length; + buffer_binding.SizeInBytes = output_info.byte_length; output_buffer_binding[name] = buffer_binding; - output_binding_desc[i] = {DML_BINDING_TYPE_BUFFER, - &output_buffer_binding[name]}; - aligned_offset += Align(byte_length, DML_MINIMUM_BUFFER_TENSOR_ALIGNMENT); - ++i; + output_binding_desc[output_info.index] = {DML_BINDING_TYPE_BUFFER, + &output_buffer_binding[name]}; + aligned_offset += + Align(output_info.byte_length, DML_MINIMUM_BUFFER_TENSOR_ALIGNMENT); } execution_context_->ExecuteGraph(this, mCompiledOperator.Get(), @@ -1962,26 +1961,25 @@ bool GraphDMLImpl::Compute(NamedResourcesPtr named_inputs, outputs_resource = execution_resources->Allocate( ResourceType::kOutput, outputs_resource_size, this); } - auto& output_length_map = graph_desc_builder_->GetNamedOutputs(); - std::vector output_binding_desc(output_length_map.size()); + auto& output_info_map = graph_desc_builder_->GetNamedOutputs(); + std::vector output_binding_desc(output_info_map.size()); // The sort of the outputs from Graph Compute is different from the // outputs from Graph Build, so the offset need to be found the correct output // with name to read back from GPU buffer. base::flat_map output_buffer_binding; // Reseve the map capacity to avoid reallocation. - output_buffer_binding.reserve(output_length_map.size()); + output_buffer_binding.reserve(output_info_map.size()); uint64_t aligned_offset = 0; - size_t i = 0; - for (auto& [name, byte_length] : output_length_map) { + for (auto& [name, output_info] : output_info_map) { DML_BUFFER_BINDING buffer_binding; buffer_binding.Buffer = outputs_resource; buffer_binding.Offset = aligned_offset; - buffer_binding.SizeInBytes = byte_length; + buffer_binding.SizeInBytes = output_info.byte_length; output_buffer_binding[name] = buffer_binding; - output_binding_desc[i] = {DML_BINDING_TYPE_BUFFER, - &output_buffer_binding[name]}; - aligned_offset += Align(byte_length, DML_MINIMUM_BUFFER_TENSOR_ALIGNMENT); - ++i; + output_binding_desc[output_info.index] = {DML_BINDING_TYPE_BUFFER, + &output_buffer_binding[name]}; + aligned_offset += + Align(output_info.byte_length, DML_MINIMUM_BUFFER_TENSOR_ALIGNMENT); } execution_context_->ExecuteGraph(this, mCompiledOperator.Get(), diff --git a/content/browser/ml/webnn/dml/readback_resource.cc b/content/browser/ml/webnn/dml/readback_resource.cc index 99ea55214fbb48..af20235432e521 100644 --- a/content/browser/ml/webnn/dml/readback_resource.cc +++ b/content/browser/ml/webnn/dml/readback_resource.cc @@ -18,16 +18,17 @@ ReadbackResource::ReadbackResource(ExecutionContext* execution_context) ReadbackResource::~ReadbackResource() = default; HRESULT ReadbackResource::InitializeResource( - std::map& named_outputs) { + std::map& named_outputs) { uint64_t aligned_offset = 0; - for (auto& [name, byte_length] : named_outputs) { + for (auto& [name, output_info] : named_outputs) { MemoryInfo memory_info = {}; memory_info.byte_offset = aligned_offset; - memory_info.byte_length = byte_length; + memory_info.byte_length = output_info.byte_length; outputs_info_map_[name] = memory_info; // Only offset need to be algnement, the byte length keep original value. - aligned_offset += Align(byte_length, DML_MINIMUM_BUFFER_TENSOR_ALIGNMENT); + aligned_offset += + Align(output_info.byte_length, DML_MINIMUM_BUFFER_TENSOR_ALIGNMENT); } outputs_resource_size_ = aligned_offset; outputs_shm_region_ = diff --git a/content/browser/ml/webnn/dml/readback_resource.h b/content/browser/ml/webnn/dml/readback_resource.h index 9fdc73ab72bb9b..6cb8f6074e311e 100644 --- a/content/browser/ml/webnn/dml/readback_resource.h +++ b/content/browser/ml/webnn/dml/readback_resource.h @@ -11,6 +11,7 @@ #include "DirectML.h" #include "components/ml/mojom/webnn_graph.mojom.h" #include "content/browser/ml/webnn/dml/gpgmm_d3d12.h" +#include "content/browser/ml/webnn/dml/graph_desc_builder.h" #include "content/browser/ml/webnn/dml/utils_dml.h" namespace content::webnn { @@ -25,7 +26,7 @@ class ReadbackResource final { explicit ReadbackResource(ExecutionContext* execution_context); ~ReadbackResource(); - HRESULT InitializeResource(std::map& named_outputs); + HRESULT InitializeResource(std::map& named_outputs); HRESULT ReadResourceFromGpu(NamedResourcesPtr& named_outputs, ID3D12Resource* src_resource); size_t GetOutputsResourceSize() const;