diff --git a/content/browser/ml/webnn/dml/command_recorder.cc b/content/browser/ml/webnn/dml/command_recorder.cc index 63e66f2e0eedc5..a54179317b1a18 100644 --- a/content/browser/ml/webnn/dml/command_recorder.cc +++ b/content/browser/ml/webnn/dml/command_recorder.cc @@ -4,6 +4,8 @@ #include "content/browser/ml/webnn/dml/command_recorder.h" +#include "base/trace_event/trace_event.h" +#include "base/trace_event/typed_macros.h" #include "content/browser/ml/webnn/dml/adapter_dml.h" #include "content/browser/ml/webnn/dml/execution_resources.h" @@ -65,6 +67,7 @@ HRESULT CommandRecorder::InitializeGraph( GraphDMLImpl* graph, IDMLCompiledOperator* compiled_operator, const DML_BINDING_DESC& input_array_binding) { + TRACE_EVENT0("gpu", "CommandRecorder::InitializeGraph"); // Reset the initializer to reference the compiled operator. IDMLCompiledOperator* ops[] = {compiled_operator}; HRESULT hr = operator_initializer_->Reset(ARRAYSIZE(ops), ops); @@ -164,6 +167,7 @@ HRESULT CommandRecorder::ExecuteGraph( IDMLCompiledOperator* compiled_operator, const std::vector& input_bindings, const std::vector& output_bindings) { + TRACE_EVENT0("gpu", "CommandRecorder::ExecuteGraph"); DCHECK(mBindingTable != nullptr); // Bind and execute the operator on the GPU. // Reset the binding table to bind for the operator we want to execute (it diff --git a/content/browser/ml/webnn/dml/graph_dml_impl.cc b/content/browser/ml/webnn/dml/graph_dml_impl.cc index 0c4a450424c8b8..d597471f59fcfc 100644 --- a/content/browser/ml/webnn/dml/graph_dml_impl.cc +++ b/content/browser/ml/webnn/dml/graph_dml_impl.cc @@ -4,9 +4,11 @@ #include "content/browser/ml/webnn/dml/graph_dml_impl.h" +#include "base/containers/span.h" #include "base/logging.h" #include "base/memory/ptr_util.h" -#include "base/containers/span.h" +#include "base/trace_event/trace_event.h" +#include "base/trace_event/typed_macros.h" #include "content/browser/ml/webnn/dml/execution_context.h" #include "content/browser/ml/webnn/dml/execution_resources.h" #include "content/browser/ml/webnn/dml/graph_dml_impl.h" @@ -1855,6 +1857,7 @@ bool GraphDMLImpl::Build(ModelInfoPtr model_info, BuildResult* out_result) { void GraphDMLImpl::Compute(NamedResourcesPtr named_inputs, ComputeCallback callback) { + TRACE_EVENT0("gpu", "GraphDMLImpl::Compute"); ExecutionResources* execution_resources = execution_context_->GetExecutionResources(); ID3D12Resource* inputs_resource = diff --git a/content/browser/ml/webnn/dml/readback_resource.cc b/content/browser/ml/webnn/dml/readback_resource.cc index 9760bdcff3151f..0d5aa1cf42ac3f 100644 --- a/content/browser/ml/webnn/dml/readback_resource.cc +++ b/content/browser/ml/webnn/dml/readback_resource.cc @@ -6,6 +6,8 @@ #include +#include "base/trace_event/trace_event.h" +#include "base/trace_event/typed_macros.h" #include "content/browser/ml/webnn/dml/execution_context.h" namespace content::webnn { @@ -42,6 +44,7 @@ HRESULT ReadbackResource::InitializeResource( // Readback inference result from GPU that is stored in named_outputs. HRESULT ReadbackResource::ReadResourceFromGpu(NamedResourcesPtr& named_outputs, ID3D12Resource* src_resource) { + TRACE_EVENT0("gpu", "ReadbackResource::ReadResourceFromGpu"); // Copy buffer from GPU resource to CPU data. execution_context_->CopyBufferRegion(readback_resource_->GetResource(), src_resource, outputs_resource_size_, diff --git a/content/browser/ml/webnn/dml/upload_resource.cc b/content/browser/ml/webnn/dml/upload_resource.cc index 00bd035bf28580..45d5c8f474c2f7 100644 --- a/content/browser/ml/webnn/dml/upload_resource.cc +++ b/content/browser/ml/webnn/dml/upload_resource.cc @@ -6,6 +6,8 @@ #include +#include "base/trace_event/trace_event.h" +#include "base/trace_event/typed_macros.h" #include "content/browser/ml/webnn/dml/execution_context.h" namespace content::webnn { @@ -60,6 +62,7 @@ UploadResource::~UploadResource() = default; // need to transition. HRESULT UploadResource::UploadConstants(ID3D12Resource* dst_resource, ConstantsInfoPtr& constants_info) { + TRACE_EVENT0("gpu", "UploadResource::UploadConstants"); base::ReadOnlySharedMemoryRegion& shared_memory_region = constants_info->shared_memory; size_t constants_byte_length = shared_memory_region.GetSize(); @@ -80,6 +83,7 @@ HRESULT UploadResource::UploadConstants(ID3D12Resource* dst_resource, HRESULT UploadResource::UploadInputs(ID3D12Resource* dst_resource, NamedResourcesPtr& named_inputs) { + TRACE_EVENT0("gpu", "UploadResource::UploadInputs"); base::ReadOnlySharedMemoryRegion& shared_memory_region = named_inputs->shared_memory; size_t inputs_byte_length = shared_memory_region.GetSize(); diff --git a/third_party/blink/renderer/modules/ml/webnn/mojo_graph.cc b/third_party/blink/renderer/modules/ml/webnn/mojo_graph.cc index 35d2f0c86ae3e5..20ffef7a682afd 100644 --- a/third_party/blink/renderer/modules/ml/webnn/mojo_graph.cc +++ b/third_party/blink/renderer/modules/ml/webnn/mojo_graph.cc @@ -4,6 +4,8 @@ #include "third_party/blink/renderer/modules/ml/webnn/mojo_graph.h" +#include "base/trace_event/trace_event.h" +#include "base/trace_event/typed_macros.h" #include "mojo/public/cpp/bindings/pending_remote.h" #include "third_party/blink/renderer/bindings/core/v8/script_promise_resolver.h" #include "third_party/blink/renderer/bindings/modules/v8/v8_ml_tensor.h" @@ -277,6 +279,7 @@ void MojoGraph::ComputeAsyncImpl(const MLNamedArrayBufferViews& inputs, void MojoGraph::ComputeSyncImpl(const MLNamedArrayBufferViews& inputs, const MLNamedArrayBufferViews& outputs, ExceptionState& exception_state) { + TRACE_EVENT0("blink", "MojoGraph::ComputeSyncImpl"); if (inputs.size() != input_resources_info_.size()) { exception_state.ThrowDOMException(DOMExceptionCode::kDataError, "The number of inputs is invalid."); @@ -284,24 +287,28 @@ void MojoGraph::ComputeSyncImpl(const MLNamedArrayBufferViews& inputs, } auto named_inputs = ml::webnn::mojom::blink::NamedResources::New(), named_outputs = ml::webnn::mojom::blink::NamedResources::New(); - for (const auto& input : inputs) { - String error_message; - auto* input_array_buffer_view = input.second.Get(); - if (input_array_buffer_view == nullptr) { - exception_state.ThrowDOMException(DOMExceptionCode::kDataError, - error_message); + { + TRACE_EVENT0("blink", "MojoGraph::ComputeSyncImpl::CopyInputs"); + for (const auto& input : inputs) { + String error_message; + auto* input_array_buffer_view = input.second.Get(); + if (input_array_buffer_view == nullptr) { + exception_state.ThrowDOMException(DOMExceptionCode::kDataError, + error_message); + } + const String& input_name = input.first; + auto memory_info = ml::webnn::mojom::blink::MemoryInfo::New(); + memory_info->byte_offset = inputs_byte_offset_.at(input_name); + memory_info->byte_length = + input_resources_info_.at(input_name).byte_length; + uint8_t* address = inputs_shm_region_.mapping.GetMemoryAs() + + memory_info->byte_offset; + memcpy(address, input_array_buffer_view->BaseAddressMaybeShared(), + input_array_buffer_view->byteLength()); + named_inputs->resources.insert(input_name, std::move(memory_info)); } - const String& input_name = input.first; - auto memory_info = ml::webnn::mojom::blink::MemoryInfo::New(); - memory_info->byte_offset = inputs_byte_offset_.at(input_name); - memory_info->byte_length = input_resources_info_.at(input_name).byte_length; - uint8_t* address = inputs_shm_region_.mapping.GetMemoryAs() + - memory_info->byte_offset; - memcpy(address, input_array_buffer_view->BaseAddressMaybeShared(), - input_array_buffer_view->byteLength()); - named_inputs->resources.insert(input_name, std::move(memory_info)); + named_inputs->shared_memory = inputs_shm_region_.region.Duplicate(); } - named_inputs->shared_memory = inputs_shm_region_.region.Duplicate(); ComputeResult result; if (!remote_graph_->Compute(std::move(named_inputs), &result, &named_outputs)) { @@ -309,29 +316,33 @@ void MojoGraph::ComputeSyncImpl(const MLNamedArrayBufferViews& inputs, "Failed to compute the graph."); return; }; - for (const auto& output : outputs) { - String error_message; - void* output_buffer_address = output.second->BaseAddressMaybeShared(); - if (output_buffer_address == nullptr) { - exception_state.ThrowDOMException(DOMExceptionCode::kOperationError, - error_message); - return; - } - auto iter = named_outputs->resources.find(output.first); - if (iter == named_outputs->resources.end()) { - exception_state.ThrowDOMException(DOMExceptionCode::kOperationError, - "Failed to get result for the output."); - return; + { + TRACE_EVENT0("blink", "MojoGraph::ComputeSyncImpl::CopyOutputs"); + for (const auto& output : outputs) { + String error_message; + void* output_buffer_address = output.second->BaseAddressMaybeShared(); + if (output_buffer_address == nullptr) { + exception_state.ThrowDOMException(DOMExceptionCode::kOperationError, + error_message); + return; + } + auto iter = named_outputs->resources.find(output.first); + if (iter == named_outputs->resources.end()) { + exception_state.ThrowDOMException( + DOMExceptionCode::kOperationError, + "Failed to get result for the output."); + return; + } + MemoryInfoPtr memory_info = std::move(iter->value); + base::ReadOnlySharedMemoryRegion& shared_memory_region = + named_outputs->shared_memory; + DCHECK(shared_memory_region.IsValid()); + size_t byte_length = base::checked_cast(memory_info->byte_length); + base::ReadOnlySharedMemoryMapping shared_memory_mapping = + shared_memory_region.MapAt(memory_info->byte_offset, byte_length); + memcpy(output_buffer_address, + shared_memory_mapping.GetMemoryAs(), byte_length); } - MemoryInfoPtr memory_info = std::move(iter->value); - base::ReadOnlySharedMemoryRegion& shared_memory_region = - named_outputs->shared_memory; - DCHECK(shared_memory_region.IsValid()); - size_t byte_length = base::checked_cast(memory_info->byte_length); - base::ReadOnlySharedMemoryMapping shared_memory_mapping = - shared_memory_region.MapAt(memory_info->byte_offset, byte_length); - memcpy(output_buffer_address, shared_memory_mapping.GetMemoryAs(), - byte_length); } }