From 52a8c1cae8cfd82f952dd27707f61b3b980bb843 Mon Sep 17 00:00:00 2001
From: Enrico Galli <enrico.galli@intel.com>
Date: Fri, 27 Sep 2024 17:24:21 -0700
Subject: [PATCH] [WebNN EP] Enable IO Bindings with MLTensor (#21301)

### Description
Enables using the MLTensor to pass data between models.


### Motivation and Context
Using MLTensor instead of ArrayBuffers reduces the number of copies
between the CPU and devices as well as the renderer and GPU process in
Chromium.
---
 .../onnxruntime/core/framework/allocator.h    |   1 +
 js/common/lib/tensor-factory-impl.ts          |  12 +
 js/common/lib/tensor-factory.ts               |  46 +++
 js/common/lib/tensor-impl.ts                  |  59 +++-
 js/common/lib/tensor-utils-impl.ts            |   8 +
 js/common/lib/tensor.ts                       |  30 +-
 js/web/lib/wasm/jsep/backend-webnn.ts         | 169 ++++++++++
 js/web/lib/wasm/jsep/init.ts                  |  19 +-
 js/web/lib/wasm/jsep/webnn/tensor-manager.ts  | 303 ++++++++++++++++++
 js/web/lib/wasm/jsep/webnn/webnn.d.ts         |  43 ++-
 js/web/lib/wasm/proxy-messages.ts             |  10 +-
 js/web/lib/wasm/session-handler-inference.ts  |  12 +-
 js/web/lib/wasm/wasm-common.ts                |  18 +-
 js/web/lib/wasm/wasm-core-impl.ts             |  62 +++-
 js/web/lib/wasm/wasm-types.ts                 |  87 ++++-
 js/web/script/test-runner-cli-args.ts         |   6 +-
 js/web/script/test-runner-cli.ts              |   2 +-
 js/web/test/test-runner.ts                    | 107 ++++++-
 js/web/test/test-types.ts                     |   6 +-
 onnxruntime/core/framework/allocator.cc       |   3 +-
 onnxruntime/core/providers/webnn/allocator.cc |  41 +++
 onnxruntime/core/providers/webnn/allocator.h  |  32 ++
 .../core/providers/webnn/builders/helper.cc   |  18 ++
 .../core/providers/webnn/builders/helper.h    |   4 +
 .../core/providers/webnn/builders/model.cc    |  53 ++-
 .../core/providers/webnn/builders/model.h     |  10 +-
 .../providers/webnn/builders/model_builder.cc |   2 +-
 .../core/providers/webnn/data_transfer.cc     |  44 +++
 .../core/providers/webnn/data_transfer.h      |  21 ++
 .../webnn/webnn_execution_provider.cc         |  71 +++-
 .../webnn/webnn_execution_provider.h          |   2 +
 onnxruntime/wasm/api.cc                       |  26 +-
 onnxruntime/wasm/pre-jsep.js                  |  33 ++
 33 files changed, 1287 insertions(+), 73 deletions(-)
 create mode 100644 js/web/lib/wasm/jsep/backend-webnn.ts
 create mode 100644 js/web/lib/wasm/jsep/webnn/tensor-manager.ts
 create mode 100644 onnxruntime/core/providers/webnn/allocator.cc
 create mode 100644 onnxruntime/core/providers/webnn/allocator.h
 create mode 100644 onnxruntime/core/providers/webnn/data_transfer.cc
 create mode 100644 onnxruntime/core/providers/webnn/data_transfer.h

diff --git a/include/onnxruntime/core/framework/allocator.h b/include/onnxruntime/core/framework/allocator.h
index abab118efd04f..57b332ce65b93 100644
--- a/include/onnxruntime/core/framework/allocator.h
+++ b/include/onnxruntime/core/framework/allocator.h
@@ -53,6 +53,7 @@ constexpr const char* OpenVINO_GPU = "OpenVINO_GPU";
 constexpr const char* OpenVINO_RT = "OpenVINO_RT";
 constexpr const char* OpenVINO_RT_NPU = "OpenVINO_RT_NPU";
 constexpr const char* WEBGPU_BUFFER = "WebGPU_Buffer";
+constexpr const char* WEBNN_TENSOR = "WebNN_Tensor";
 
 constexpr size_t kAllocAlignment = 256;
 
diff --git a/js/common/lib/tensor-factory-impl.ts b/js/common/lib/tensor-factory-impl.ts
index 5eb7ba4793b32..cbc0270091818 100644
--- a/js/common/lib/tensor-factory-impl.ts
+++ b/js/common/lib/tensor-factory-impl.ts
@@ -11,6 +11,7 @@ import {
   TensorFromImageBitmapOptions,
   TensorFromImageDataOptions,
   TensorFromImageElementOptions,
+  TensorFromMLTensorOptions,
   TensorFromTextureOptions,
   TensorFromUrlOptions,
 } from './tensor-factory.js';
@@ -310,6 +311,17 @@ export const tensorFromGpuBuffer = <T extends TensorInterface.GpuBufferDataTypes
   return new Tensor({ location: 'gpu-buffer', type: dataType ?? 'float32', gpuBuffer, dims, download, dispose });
 };
 
+/**
+ * implementation of Tensor.fromMLTensor().
+ */
+export const tensorFromMLTensor = <T extends TensorInterface.MLTensorDataTypes>(
+  mlTensor: TensorInterface.MLTensorType,
+  options: TensorFromMLTensorOptions<T>,
+): Tensor => {
+  const { dataType, dims, download, dispose } = options;
+  return new Tensor({ location: 'ml-tensor', type: dataType ?? 'float32', mlTensor, dims, download, dispose });
+};
+
 /**
  * implementation of Tensor.fromPinnedBuffer().
  */
diff --git a/js/common/lib/tensor-factory.ts b/js/common/lib/tensor-factory.ts
index 7938b4a4eb927..f66684112623e 100644
--- a/js/common/lib/tensor-factory.ts
+++ b/js/common/lib/tensor-factory.ts
@@ -86,6 +86,20 @@ export interface GpuBufferConstructorParameters<T extends Tensor.GpuBufferDataTy
   readonly gpuBuffer: Tensor.GpuBufferType;
 }
 
+export interface MLTensorConstructorParameters<T extends Tensor.MLTensorDataTypes = Tensor.MLTensorDataTypes>
+  extends CommonConstructorParameters<T>,
+    GpuResourceConstructorParameters<T> {
+  /**
+   * Specify the location of the data to be 'ml-tensor'.
+   */
+  readonly location: 'ml-tensor';
+
+  /**
+   * Specify the WebNN MLTensor that holds the tensor data.
+   */
+  readonly mlTensor: Tensor.MLTensorType;
+}
+
 // #endregion
 
 // the following region contains type definitions of each individual options.
@@ -219,6 +233,15 @@ export interface TensorFromGpuBufferOptions<T extends Tensor.GpuBufferDataTypes>
   dataType?: T;
 }
 
+export interface TensorFromMLTensorOptions<T extends Tensor.MLTensorDataTypes>
+  extends Pick<Tensor, 'dims'>,
+    GpuResourceConstructorParameters<T> {
+  /**
+   * Describes the data type of the tensor.
+   */
+  dataType?: T;
+}
+
 // #endregion
 
 /**
@@ -336,6 +359,29 @@ export interface TensorFactory {
     options: TensorFromGpuBufferOptions<T>,
   ): TypedTensor<T>;
 
+  /**
+   * create a tensor from a WebNN MLTensor
+   *
+   * @param tensor - the MLTensor object to create tensor from
+   * @param options - An optional object representing options for creating tensor from a WebNN MLTensor.
+   *
+   * The options include following properties:
+   * - `dataType`: the data type of the tensor. If omitted, assume 'float32'.
+   * - `dims`: the dimension of the tensor. Required.
+   * - `download`: an optional function to download the tensor data from the MLTensor to CPU. If omitted, the MLTensor
+   * data will not be able to download. Usually, this is provided by the WebNN backend for the inference outputs.
+   * Users don't need to provide this function.
+   * - `dispose`: an optional function to dispose the tensor data on the WebNN MLTensor. If omitted, the MLTensor will
+   * not be disposed. Usually, this is provided by the WebNN backend for the inference outputs. Users don't need to
+   * provide this function.
+   *
+   * @returns a tensor object
+   */
+  fromMLTensor<T extends Tensor.MLTensorDataTypes>(
+    tensor: Tensor.MLTensorType,
+    options: TensorFromMLTensorOptions<T>,
+  ): TypedTensor<T>;
+
   /**
    * create a tensor from a pre-allocated buffer. The buffer will be used as a pinned buffer.
    *
diff --git a/js/common/lib/tensor-impl.ts b/js/common/lib/tensor-impl.ts
index 342f5e3a467eb..c0e1582c17de5 100644
--- a/js/common/lib/tensor-impl.ts
+++ b/js/common/lib/tensor-impl.ts
@@ -6,16 +6,19 @@ import { TensorToDataUrlOptions, TensorToImageDataOptions } from './tensor-conve
 import {
   tensorFromGpuBuffer,
   tensorFromImage,
+  tensorFromMLTensor,
   tensorFromPinnedBuffer,
   tensorFromTexture,
 } from './tensor-factory-impl.js';
 import {
   CpuPinnedConstructorParameters,
   GpuBufferConstructorParameters,
+  MLTensorConstructorParameters,
   TensorFromGpuBufferOptions,
   TensorFromImageBitmapOptions,
   TensorFromImageDataOptions,
   TensorFromImageElementOptions,
+  TensorFromMLTensorOptions,
   TensorFromTextureOptions,
   TensorFromUrlOptions,
   TextureConstructorParameters,
@@ -37,6 +40,7 @@ type TensorDataType = TensorInterface.DataType;
 type TensorDataLocation = TensorInterface.DataLocation;
 type TensorTextureType = TensorInterface.TextureType;
 type TensorGpuBufferType = TensorInterface.GpuBufferType;
+type TensorMLTensorType = TensorInterface.MLTensorType;
 
 /**
  * the implementation of Tensor interface.
@@ -86,6 +90,15 @@ export class Tensor implements TensorInterface {
    */
   constructor(params: GpuBufferConstructorParameters);
 
+  /**
+   * Construct a new tensor object from the WebNN MLTensor with the given type and dims.
+   *
+   * Tensor's location will be set to 'ml-tensor'.
+   *
+   * @param params - Specify the parameters to construct the tensor.
+   */
+  constructor(params: MLTensorConstructorParameters);
+
   /**
    * implementation.
    */
@@ -98,7 +111,8 @@ export class Tensor implements TensorInterface {
       | readonly boolean[]
       | CpuPinnedConstructorParameters
       | TextureConstructorParameters
-      | GpuBufferConstructorParameters,
+      | GpuBufferConstructorParameters
+      | MLTensorConstructorParameters,
     arg1?: TensorDataType | Uint8ClampedArray | readonly number[] | readonly string[] | readonly boolean[],
     arg2?: readonly number[],
   ) {
@@ -155,6 +169,25 @@ export class Tensor implements TensorInterface {
           this.disposer = arg0.dispose;
           break;
         }
+        case 'ml-tensor': {
+          if (
+            type !== 'float32' &&
+            type !== 'float16' &&
+            type !== 'int32' &&
+            type !== 'int64' &&
+            type !== 'uint32' &&
+            type !== 'uint64' &&
+            type !== 'int8' &&
+            type !== 'uint8' &&
+            type !== 'bool'
+          ) {
+            throw new TypeError(`unsupported type "${type}" to create tensor from MLTensor`);
+          }
+          this.mlTensorData = arg0.mlTensor;
+          this.downloader = arg0.download;
+          this.disposer = arg0.dispose;
+          break;
+        }
         default:
           throw new Error(`Tensor constructor: unsupported location '${this.dataLocation}'`);
       }
@@ -325,6 +358,13 @@ export class Tensor implements TensorInterface {
     return tensorFromGpuBuffer(gpuBuffer, options);
   }
 
+  static fromMLTensor<T extends TensorInterface.MLTensorDataTypes>(
+    mlTensor: TensorMLTensorType,
+    options: TensorFromMLTensorOptions<T>,
+  ): TensorInterface {
+    return tensorFromMLTensor(mlTensor, options);
+  }
+
   static fromPinnedBuffer<T extends TensorInterface.CpuPinnedDataTypes>(
     type: T,
     buffer: TensorInterface.DataTypeMap[T],
@@ -373,6 +413,11 @@ export class Tensor implements TensorInterface {
    */
   private gpuBufferData?: TensorGpuBufferType;
 
+  /**
+   * stores the underlying WebNN MLTensor when location is 'ml-tensor'. otherwise empty.
+   */
+  private mlTensorData?: TensorMLTensorType;
+
   /**
    * stores an optional downloader function to download data from GPU to CPU.
    */
@@ -420,6 +465,14 @@ export class Tensor implements TensorInterface {
     }
     return this.gpuBufferData;
   }
+
+  get mlTensor(): TensorMLTensorType {
+    this.ensureValid();
+    if (!this.mlTensorData) {
+      throw new Error('The data is not stored as a WebNN MLTensor.');
+    }
+    return this.mlTensorData;
+  }
   // #endregion
 
   // #region methods
@@ -431,7 +484,8 @@ export class Tensor implements TensorInterface {
       case 'cpu-pinned':
         return this.data;
       case 'texture':
-      case 'gpu-buffer': {
+      case 'gpu-buffer':
+      case 'ml-tensor': {
         if (!this.downloader) {
           throw new Error('The current tensor is not created with a specified data downloader.');
         }
@@ -472,6 +526,7 @@ export class Tensor implements TensorInterface {
     this.cpuData = undefined;
     this.gpuTextureData = undefined;
     this.gpuBufferData = undefined;
+    this.mlTensorData = undefined;
     this.downloader = undefined;
     this.isDownloading = undefined;
 
diff --git a/js/common/lib/tensor-utils-impl.ts b/js/common/lib/tensor-utils-impl.ts
index 9c633cd95fac3..97b1735e6eac5 100644
--- a/js/common/lib/tensor-utils-impl.ts
+++ b/js/common/lib/tensor-utils-impl.ts
@@ -4,6 +4,7 @@
 import {
   CpuPinnedConstructorParameters,
   GpuBufferConstructorParameters,
+  MLTensorConstructorParameters,
   TextureConstructorParameters,
 } from './tensor-factory.js';
 import { Tensor } from './tensor-impl.js';
@@ -56,6 +57,13 @@ export const tensorReshape = (tensor: Tensor, dims: readonly number[]): Tensor =
         type: tensor.type as GpuBufferConstructorParameters['type'],
         dims,
       });
+    case 'ml-tensor':
+      return new Tensor({
+        location: 'ml-tensor',
+        mlTensor: tensor.mlTensor,
+        type: tensor.type as MLTensorConstructorParameters['type'],
+        dims,
+      });
     default:
       throw new Error(`tensorReshape: tensor location ${tensor.location} is not supported`);
   }
diff --git a/js/common/lib/tensor.ts b/js/common/lib/tensor.ts
index 8a1197994393b..17e2f4d37c91f 100644
--- a/js/common/lib/tensor.ts
+++ b/js/common/lib/tensor.ts
@@ -42,6 +42,13 @@ interface TypedTensorBase<T extends Tensor.Type> {
    */
   readonly gpuBuffer: Tensor.GpuBufferType;
 
+  /**
+   * Get the WebNN MLTensor that holds the tensor data.
+   *
+   * If the data is not in a WebNN MLTensor, throw error.
+   */
+  readonly mlTensor: Tensor.MLTensorType;
+
   /**
    * Get the buffer data of the tensor.
    *
@@ -136,15 +143,36 @@ export declare namespace Tensor {
    */
   export type GpuBufferType = { size: number; mapState: 'unmapped' | 'pending' | 'mapped' };
 
+  /**
+   * type alias for WebNN MLTensor
+   *
+   * The specification for WebNN's MLTensor is currently in flux.
+   */
+  export type MLTensorType = unknown;
+
   /**
    * supported data types for constructing a tensor from a WebGPU buffer
    */
   export type GpuBufferDataTypes = 'float32' | 'float16' | 'int32' | 'int64' | 'uint32' | 'uint8' | 'bool';
 
+  /**
+   * supported data types for constructing a tensor from a WebNN MLTensor
+   */
+  export type MLTensorDataTypes =
+    | 'float32'
+    | 'float16'
+    | 'int8'
+    | 'uint8'
+    | 'int32'
+    | 'uint32'
+    | 'int64'
+    | 'uint64'
+    | 'bool';
+
   /**
    * represent where the tensor data is stored
    */
-  export type DataLocation = 'none' | 'cpu' | 'cpu-pinned' | 'texture' | 'gpu-buffer';
+  export type DataLocation = 'none' | 'cpu' | 'cpu-pinned' | 'texture' | 'gpu-buffer' | 'ml-tensor';
 
   /**
    * represent the data type of a tensor
diff --git a/js/web/lib/wasm/jsep/backend-webnn.ts b/js/web/lib/wasm/jsep/backend-webnn.ts
new file mode 100644
index 0000000000000..685f3dc019461
--- /dev/null
+++ b/js/web/lib/wasm/jsep/backend-webnn.ts
@@ -0,0 +1,169 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// WebNN API currently does not have a TypeScript definition file. This file is a workaround with types generated from
+// WebNN API specification.
+// https://github.com/webmachinelearning/webnn/issues/677
+/// <reference path="webnn/webnn.d.ts" />
+
+import { Env, Tensor } from 'onnxruntime-common';
+
+import { DataType } from '../wasm-common';
+import { getInstance } from '../wasm-factory';
+
+import { createView } from './tensor-view';
+import { TensorId, createTensorManager } from './webnn/tensor-manager';
+import { configureLogger, LOG_DEBUG } from './log';
+
+/*
+ * TensorProto::data_type to WebNN OperandType mapping.
+ */
+const onnxDataTypeToWebnnDataType = new Map<DataType, MLOperandDataType>([
+  [DataType.float, 'float32'],
+  [DataType.float16, 'float16'],
+  [DataType.int32, 'int32'],
+  [DataType.uint32, 'uint32'],
+  [DataType.int64, 'int64'],
+  [DataType.uint64, 'uint64'],
+  [DataType.int8, 'int8'],
+  [DataType.uint8, 'uint8'],
+  [DataType.bool, 'uint8'],
+]);
+
+/**
+ * WebNN backend implementation. This class is used to keep track of the MLTensors created by the backend and keep track
+ * of the current MLContext being used by the sessions.
+ */
+export class WebNNBackend {
+  /**
+   * Tensor managers for each session.
+   */
+  private tensorManager = createTensorManager(this);
+  /**
+   * Maps from session id to MLContexts.
+   */
+  private mlContextBySessionId = new Map<number, MLContext>();
+  /**
+   * Maps from MLContext to session ids.
+   */
+  private sessionIdsByMLContext = new Map<MLContext, Set<number>>();
+  /**
+   * Current session id.
+   */
+  private activeSessionId?: number;
+
+  constructor(env: Env) {
+    configureLogger(env.logLevel!, !!env.debug);
+  }
+
+  public get currentSessionId(): number {
+    if (this.activeSessionId === undefined) {
+      throw new Error('No active session');
+    }
+    return this.activeSessionId;
+  }
+
+  public onRunStart(sessionId: number): void {
+    this.activeSessionId = sessionId;
+  }
+
+  public get currentContext(): MLContext {
+    const mlContext = this.getMLContext(this.currentSessionId);
+    if (!mlContext) {
+      throw new Error(`No MLContext found for session ${this.currentSessionId}`);
+    }
+    return mlContext;
+  }
+
+  public registerMLContext(sessionId: number, mlContext: MLContext): void {
+    this.mlContextBySessionId.set(sessionId, mlContext);
+    let sessionIds = this.sessionIdsByMLContext.get(mlContext);
+    if (!sessionIds) {
+      sessionIds = new Set();
+      this.sessionIdsByMLContext.set(mlContext, sessionIds);
+    }
+    sessionIds.add(sessionId);
+  }
+
+  public onReleaseSession(sessionId: number): void {
+    const mlContext = this.mlContextBySessionId.get(sessionId)!;
+    if (!mlContext) {
+      // Current session is not a WebNN session.
+      return;
+    }
+    this.mlContextBySessionId.delete(sessionId);
+    const sessionIds = this.sessionIdsByMLContext.get(mlContext)!;
+    sessionIds.delete(sessionId);
+    if (sessionIds.size === 0) {
+      this.sessionIdsByMLContext.delete(mlContext);
+      this.tensorManager.releaseTensorsForContext(mlContext);
+    }
+  }
+
+  public getMLContext(sessionId: number): MLContext | undefined {
+    return this.mlContextBySessionId.get(sessionId);
+  }
+
+  public reserveTensorId(): TensorId {
+    return this.tensorManager.reserveTensorId();
+  }
+
+  public releaseTensorId(tensorId: TensorId): void {
+    LOG_DEBUG('verbose', () => `[WebNN] releaseTensorId {tensorId: ${tensorId}}`);
+    this.tensorManager.releaseTensorId(tensorId);
+  }
+
+  public async ensureTensor(
+    tensorId: TensorId,
+    onnxDataType: DataType,
+    dimensions: number[],
+    copyOld: boolean,
+  ): Promise<MLTensor> {
+    const webnnDataType = onnxDataTypeToWebnnDataType.get(onnxDataType);
+    if (!webnnDataType) {
+      throw new Error(`Unsupported ONNX data type: ${onnxDataType}`);
+    }
+    return this.tensorManager.ensureTensor(tensorId, webnnDataType, dimensions, copyOld);
+  }
+
+  public uploadTensor(tensorId: TensorId, data: Uint8Array): void {
+    const wasm = getInstance();
+    if (!wasm.shouldTransferToMLTensor) {
+      throw new Error('Trying to upload to a MLTensor while shouldTransferToMLTensor is false');
+    }
+    LOG_DEBUG('verbose', () => `[WebNN] uploadTensor {tensorId: ${tensorId}, data: ${data.byteLength}}`);
+    this.tensorManager.upload(tensorId, data);
+  }
+
+  public async downloadTensor(tensorId: TensorId, dstBuffer: ArrayBufferView | ArrayBuffer): Promise<undefined> {
+    return this.tensorManager.download(tensorId, dstBuffer);
+  }
+
+  public createMLTensorDownloader(tensorId: TensorId, type: Tensor.MLTensorDataTypes): () => Promise<Tensor.DataType> {
+    return async () => {
+      const data = await this.tensorManager.download(tensorId);
+      return createView(data, type);
+    };
+  }
+
+  public registerMLTensor(tensor: MLTensor, onnxDataType: DataType, dimensions: number[]): TensorId {
+    const webnnDataType = onnxDataTypeToWebnnDataType.get(onnxDataType);
+    if (!webnnDataType) {
+      throw new Error(`Unsupported ONNX data type: ${onnxDataType}`);
+    }
+
+    const id = this.tensorManager.registerTensor(this.currentContext, tensor, webnnDataType, dimensions);
+    LOG_DEBUG(
+      'verbose',
+      () =>
+        `[WebNN] registerMLTensor {tensor: ${tensor}, dataType: ${webnnDataType}, dimensions: ${
+          dimensions
+        }} -> {tensorId: ${id}}`,
+    );
+    return id;
+  }
+
+  public flush(): void {
+    // Unlike the WebGPU backend, the WebNN backend does not need to flush any pending operations.
+  }
+}
diff --git a/js/web/lib/wasm/jsep/init.ts b/js/web/lib/wasm/jsep/init.ts
index 2f0e5da2b3f27..7bce5ff9390e8 100644
--- a/js/web/lib/wasm/jsep/init.ts
+++ b/js/web/lib/wasm/jsep/init.ts
@@ -12,6 +12,7 @@ import { LOG_DEBUG } from './log';
 import { TensorView } from './tensor-view';
 import { ShapeUtil } from './util';
 import { AdapterInfo, ComputeContext, ComputeContextInputsOutputsMapping, ProgramInfo } from './webgpu/types';
+import { WebNNBackend } from './backend-webnn';
 
 /* eslint-disable no-bitwise */
 
@@ -266,6 +267,22 @@ export const init = async (
       () => backend.replay(),
     ]);
   } else {
-    jsepInit('webnn');
+    const backend = new WebNNBackend(env);
+    jsepInit('webnn', [
+      backend,
+      // jsepReserveTensorId
+      () => backend.reserveTensorId(),
+      // jsepReleaseTensorId,
+      (tensorId: number) => backend.releaseTensorId(tensorId),
+      // jsepEnsureTensor
+      async (tensorId: number, onnxDataType: number, shape: number[], copyOld) =>
+        backend.ensureTensor(tensorId, onnxDataType, shape, copyOld),
+      // jsepUploadTensor
+      (tensorId: number, data: Uint8Array) => {
+        backend.uploadTensor(tensorId, data);
+      },
+      // jsepDownloadTensor
+      async (tensorId: number, dstBuffer: ArrayBufferView | ArrayBuffer) => backend.downloadTensor(tensorId, dstBuffer),
+    ]);
   }
 };
diff --git a/js/web/lib/wasm/jsep/webnn/tensor-manager.ts b/js/web/lib/wasm/jsep/webnn/tensor-manager.ts
new file mode 100644
index 0000000000000..9475de019ed1d
--- /dev/null
+++ b/js/web/lib/wasm/jsep/webnn/tensor-manager.ts
@@ -0,0 +1,303 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+import { WebNNBackend } from '../backend-webnn';
+import { LOG_DEBUG } from '../log';
+
+// WebNN API currently does not have a TypeScript definition file. This file is a workaround with types generated from
+// WebNN API specification.
+// https://github.com/webmachinelearning/webnn/issues/677
+/// <reference path="webnn.d.ts" />
+
+export type TensorId = number;
+
+/**
+ * Manages TensorId to MLTensor mapping.
+ */
+export interface TensorManager {
+  /**
+   * Reserve a new TensorId.
+   */
+  reserveTensorId(): TensorId;
+  /**
+   * Release a TensorId.
+   */
+  releaseTensorId(tensorId: TensorId): void;
+  /**
+   * Ensure a MLTensor is created for the TensorId.
+   */
+  ensureTensor(
+    tensorId: TensorId,
+    dataType: MLOperandDataType,
+    shape: readonly number[],
+    copyOld: boolean,
+  ): Promise<MLTensor>;
+  /**
+   * Upload data to a MLTensor.
+   */
+  upload(tensorId: TensorId, data: Uint8Array): void;
+  /**
+   * Download data from a MLTensor.
+   */
+  download(tensorId: TensorId): Promise<ArrayBuffer>;
+  download(tensorId: TensorId, dstTensor: ArrayBufferView | ArrayBuffer): Promise<undefined>;
+  /**
+   * Release all tensors for a MLContext.
+   */
+  releaseTensorsForContext(mlContext: MLContext): void;
+  /**
+   * Register an externally created MLTensor with a given MLContext and return a TensorId.
+   */
+  registerTensor(mlContext: MLContext, mlTensor: MLTensor, dataType: MLOperandDataType, shape: number[]): TensorId;
+}
+
+let tensorGuid = 1;
+const createNewTensorId = (): TensorId => tensorGuid++;
+
+export type MLTensorEntry = [MLTensor, MLOperandDataType, readonly number[]];
+
+/**
+ * TensorTracker tracks the MLTensor and pending upload data.
+ *
+ * We need to track the MLTensor and pending upload data because we delay the creation of MLTensor until
+ * we know the data type and shape. This is because future implementations of WebNN will only support creating
+ * MLTensors with dataTypes and shape.
+ */
+class TensorTracker {
+  private tensorEntry?: MLTensorEntry;
+  private activeUpload?: Uint8Array;
+  private tensorCache: MLTensorEntry[];
+
+  constructor(
+    private mlContext?: MLContext,
+    tensorEntry?: MLTensorEntry,
+  ) {
+    this.tensorEntry = tensorEntry;
+    this.tensorCache = tensorEntry ? [tensorEntry] : [];
+  }
+
+  public get tensor(): MLTensor | undefined {
+    return this.tensorEntry?.[0];
+  }
+
+  public get context(): MLContext {
+    if (!this.mlContext) {
+      throw new Error('MLContext has not been set.');
+    }
+    return this.mlContext;
+  }
+
+  public set context(mlContext: MLContext) {
+    if (this.mlContext && this.mlContext !== mlContext) {
+      throw new Error('MLTensor in use in a different MLContext.');
+    }
+    this.mlContext = mlContext;
+  }
+
+  public destroy(): void {
+    for (const [mlTensor] of this.tensorCache) {
+      mlTensor.destroy();
+    }
+    this.tensorCache = [];
+    this.tensorEntry = undefined;
+  }
+
+  public trySelectTensor(context: MLContext, tryMLTensor: MLTensor): boolean {
+    for (const [mlTensor, dataType, shape] of this.tensorCache) {
+      if (tryMLTensor === mlTensor) {
+        if (this.context !== context) {
+          throw new Error('MLTensor cannot be registered with a different MLContext.');
+        }
+        this.tensorEntry = [mlTensor, dataType, shape];
+        return true;
+      }
+    }
+    return false;
+  }
+
+  public async ensureTensor(
+    dataType: MLOperandDataType,
+    shape: readonly number[],
+    copyOld: boolean,
+  ): Promise<MLTensor> {
+    if (this.tensorEntry) {
+      const [mlTensor, existingDataType, existingShape] = this.tensorEntry;
+      if (existingDataType === dataType && existingShape.every((v, i) => v === shape[i])) {
+        return mlTensor;
+      }
+    }
+
+    for (const [mlTensor, existingDataType, existingShape] of this.tensorCache) {
+      if (existingDataType === dataType && existingShape.every((v, i) => v === shape[i])) {
+        if (copyOld && this.tensorEntry) {
+          // WebNN does not support copyTensorToTensor, so we need to read and write the tensors.
+          LOG_DEBUG(
+            'verbose',
+            () => `[WebNN] Slowdown may occur, having to copy existing tensor {dataType: ${dataType}, shape: ${shape}}`,
+          );
+          const data = await this.context.readTensor(this.tensorEntry[0]);
+          this.context.writeTensor(mlTensor, data);
+        }
+        this.tensorEntry = [mlTensor, existingDataType, existingShape];
+        return mlTensor;
+      }
+    }
+    LOG_DEBUG('verbose', () => `[WebNN] MLContext.createTensor {dataType: ${dataType}, shape: ${shape}}`);
+    // eslint-disable-next-line no-bitwise
+    const usage = MLTensorUsage.READ | MLTensorUsage.WRITE;
+    const tensor = await this.context.createTensor({
+      dataType,
+      shape,
+      // Assign both shape and dimensions while transitioning to new API.
+      dimensions: shape,
+      usage,
+    });
+    this.tensorEntry = [tensor, dataType, shape];
+    this.tensorCache.push(this.tensorEntry);
+
+    if (this.activeUpload) {
+      this.mlContext?.writeTensor(tensor, this.activeUpload);
+      this.activeUpload = undefined;
+    }
+
+    return tensor;
+  }
+
+  public upload(data: Uint8Array): void {
+    if (!this.tensorEntry) {
+      this.activeUpload = new Uint8Array(data);
+      return;
+    }
+    this.mlContext?.writeTensor(this.tensorEntry[0], data);
+  }
+
+  public async download(dstBuffer?: ArrayBufferView | ArrayBuffer): Promise<ArrayBuffer | undefined> {
+    if (this.activeUpload) {
+      if (dstBuffer) {
+        if (dstBuffer instanceof ArrayBuffer) {
+          new Uint8Array(dstBuffer).set(this.activeUpload);
+        } else {
+          new Uint8Array(dstBuffer.buffer, dstBuffer.byteOffset, dstBuffer.byteLength).set(this.activeUpload);
+        }
+
+        return;
+      } else {
+        return this.activeUpload.buffer;
+      }
+    }
+    if (!this.tensorEntry) {
+      throw new Error('Tensor has not been created.');
+    }
+    if (dstBuffer) {
+      return this.context.readTensor(this.tensorEntry[0], dstBuffer);
+    }
+    return this.context.readTensor(this.tensorEntry[0]);
+  }
+}
+
+class TensorManagerImpl implements TensorManager {
+  private tensorsById = new Map<TensorId, TensorTracker>();
+  private tensorIdsByContext = new Map<MLContext, Set<TensorId>>();
+
+  constructor(private backend: WebNNBackend) {}
+
+  public reserveTensorId(): TensorId {
+    const tensorId = createNewTensorId();
+    this.tensorsById.set(tensorId, new TensorTracker());
+    return tensorId;
+  }
+
+  public releaseTensorId(tensorId: TensorId): void {
+    const tensorTracker = this.tensorsById.get(tensorId);
+    if (!tensorTracker) {
+      return;
+    }
+    tensorTracker.destroy();
+    this.tensorsById.delete(tensorId);
+    for (const [mlContext, tensors] of this.tensorIdsByContext) {
+      if (tensors.has(tensorId)) {
+        tensors.delete(tensorId);
+        if (tensors.size === 0) {
+          this.tensorIdsByContext.delete(mlContext);
+        }
+        break;
+      }
+    }
+  }
+
+  public async ensureTensor(
+    tensorId: TensorId,
+    dataType: MLOperandDataType,
+    shape: number[],
+    copyOld: boolean,
+  ): Promise<MLTensor> {
+    LOG_DEBUG(
+      'verbose',
+      () =>
+        `[WebNN] TensorManager.ensureTensor {tensorId: ${tensorId}, dataType: ${
+          dataType
+        }, shape: ${shape}, copyOld: ${copyOld}}`,
+    );
+    const tensor = this.tensorsById.get(tensorId);
+    if (!tensor) {
+      throw new Error('Tensor not found.');
+    }
+    tensor.context = this.backend.currentContext;
+    if (!this.tensorIdsByContext.has(this.backend.currentContext)) {
+      this.tensorIdsByContext.set(this.backend.currentContext, new Set());
+    }
+    this.tensorIdsByContext.get(this.backend.currentContext)?.add(tensorId);
+    return tensor.ensureTensor(dataType, shape, copyOld);
+  }
+
+  public upload(tensorId: TensorId, data: Uint8Array): void {
+    this.tensorsById.get(tensorId)!.upload(data);
+  }
+
+  public async download(tensorId: TensorId): Promise<ArrayBuffer>;
+  public async download(tensorId: TensorId, dstBuffer: ArrayBufferView | ArrayBuffer): Promise<undefined>;
+  async download(tensorId: TensorId, dstBuffer?: ArrayBufferView | ArrayBuffer): Promise<ArrayBuffer | undefined> {
+    LOG_DEBUG(
+      'verbose',
+      () => `[WebNN] TensorManager.download {tensorId: ${tensorId}, dstBuffer: ${dstBuffer?.byteLength}}`,
+    );
+    return this.tensorsById.get(tensorId)!.download(dstBuffer);
+  }
+
+  public releaseTensorsForContext(mlContext: MLContext): void {
+    const tensors = this.tensorIdsByContext.get(mlContext);
+    if (!tensors) {
+      return;
+    }
+    for (const tensorId of tensors) {
+      this.tensorsById.get(tensorId)!.destroy();
+      this.tensorsById.delete(tensorId);
+    }
+    this.tensorIdsByContext.delete(mlContext);
+  }
+
+  public registerTensor(
+    mlContext: MLContext,
+    mlTensor: MLTensor,
+    dataType: MLOperandDataType,
+    shape: readonly number[],
+  ): TensorId {
+    for (const [tensorId, tensorTracker] of this.tensorsById) {
+      if (tensorTracker.trySelectTensor(mlContext, mlTensor)) {
+        return tensorId;
+      }
+    }
+    const tensorId = createNewTensorId();
+    this.tensorsById.set(tensorId, new TensorTracker(mlContext, [mlTensor, dataType, shape]));
+    let tensors = this.tensorIdsByContext.get(mlContext);
+    if (!tensors) {
+      tensors = new Set();
+      this.tensorIdsByContext.set(mlContext, tensors);
+    }
+    tensors.add(tensorId);
+    return tensorId;
+  }
+}
+
+export const createTensorManager = (...args: ConstructorParameters<typeof TensorManagerImpl>): TensorManager =>
+  new TensorManagerImpl(...args);
diff --git a/js/web/lib/wasm/jsep/webnn/webnn.d.ts b/js/web/lib/wasm/jsep/webnn/webnn.d.ts
index f8a1e1966fd4c..5cb0f4e74c3df 100644
--- a/js/web/lib/wasm/jsep/webnn/webnn.d.ts
+++ b/js/web/lib/wasm/jsep/webnn/webnn.d.ts
@@ -1,6 +1,8 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+/* eslint-disable @typescript-eslint/naming-convention */
+
 interface NavigatorML {
   readonly ml: ML;
 }
@@ -30,7 +32,9 @@ type MLInputOperandLayout = 'nchw'|'nhwc';
 type MLOperandDataType = 'float32'|'float16'|'int32'|'uint32'|'int64'|'uint64'|'int8'|'uint8';
 interface MLOperandDescriptor {
   dataType: MLOperandDataType;
-  dimensions?: number[];
+  shape?: readonly number[];
+  /** @deprecated Use shape instead of dimensions */
+  dimensions?: readonly number[];
 }
 interface MLOperand {
   dataType(): MLOperandDataType;
@@ -379,23 +383,32 @@ interface MLGraphBuilder {
   where(condition: MLOperand, input: MLOperand, other: MLOperand): MLOperand;
 }
 
-// Experimental MLBuffer interface
+// Experimental MLTensor interface
 
-type MLSize64Out = number;
-interface MLBuffer {
-  readonly size: MLSize64Out;
+interface MLTensor {
   destroy(): void;
 }
-type MLSize64 = number;
-interface MLBufferDescriptor {
-  size: MLSize64;
+
+type MLNamedTensor = Record<string, MLTensor>;
+
+type MLTensorUsageFlags = number;
+
+declare const MLTensorUsage: {
+  readonly WEBGPU_INTEROP: MLTensorUsageFlags;
+  readonly READ: MLTensorUsageFlags;
+  readonly WRITE: MLTensorUsageFlags;
+};
+
+interface MLTensorDescriptor extends MLOperandDescriptor {
+  usage: MLTensorUsageFlags;
 }
-type MLNamedBuffers = Record<string, MLBuffer>;
+
 interface MLContext {
-  createBuffer(descriptor: MLBufferDescriptor): MLBuffer;
-  writeBuffer(
-      dstBuffer: MLBuffer, srcData: ArrayBufferView|ArrayBuffer, srcElementOffset?: MLSize64,
-      srcElementSize?: MLSize64): void;
-  readBuffer(srcBuffer: MLBuffer): Promise<ArrayBuffer>;
-  dispatch(graph: MLGraph, inputs: MLNamedBuffers, outputs: MLNamedBuffers): void;
+  createTensor(descriptor: MLTensorDescriptor): Promise<MLTensor>;
+  writeTensor(
+      destinationTensor: MLTensor, sourceData: ArrayBufferView|ArrayBuffer, sourceElementOffset?: number,
+      sourceElementSize?: number): void;
+  readTensor(sourceTensor: MLTensor): Promise<ArrayBuffer>;
+  readTensor(sourceTensor: MLTensor, destinationData: ArrayBufferView|ArrayBuffer): Promise<undefined>;
+  dispatch(graph: MLGraph, inputs: MLNamedTensor, outputs: MLNamedTensor): void;
 }
diff --git a/js/web/lib/wasm/proxy-messages.ts b/js/web/lib/wasm/proxy-messages.ts
index 8f3acdd582445..559f319a10f66 100644
--- a/js/web/lib/wasm/proxy-messages.ts
+++ b/js/web/lib/wasm/proxy-messages.ts
@@ -19,11 +19,18 @@ export type GpuBufferMetadata = {
   dispose?: () => void;
 };
 
+export type MLTensorMetadata = {
+  mlTensor: Tensor.MLTensorType;
+  download?: () => Promise<Tensor.DataTypeMap[Tensor.MLTensorDataTypes]>;
+  dispose?: () => void;
+};
+
 /**
- * Tensors on location 'cpu-pinned' and 'gpu-buffer' are not serializable.
+ * Tensors on location 'cpu-pinned', 'gpu-buffer', and 'ml-tensor' are not serializable.
  */
 export type UnserializableTensorMetadata =
   | [dataType: Tensor.Type, dims: readonly number[], data: GpuBufferMetadata, location: 'gpu-buffer']
+  | [dataType: Tensor.Type, dims: readonly number[], data: MLTensorMetadata, location: 'ml-tensor']
   | [dataType: Tensor.Type, dims: readonly number[], data: Tensor.DataType, location: 'cpu-pinned'];
 
 /**
@@ -34,6 +41,7 @@ export type UnserializableTensorMetadata =
  *   - cpu: Uint8Array
  *   - cpu-pinned: Uint8Array
  *   - gpu-buffer: GpuBufferMetadata
+ *   - ml-tensor: MLTensorMetadata
  * - location: tensor data location
  */
 export type TensorMetadata = SerializableTensorMetadata | UnserializableTensorMetadata;
diff --git a/js/web/lib/wasm/session-handler-inference.ts b/js/web/lib/wasm/session-handler-inference.ts
index eff3e91389c98..c19043cc3637f 100644
--- a/js/web/lib/wasm/session-handler-inference.ts
+++ b/js/web/lib/wasm/session-handler-inference.ts
@@ -12,7 +12,7 @@ import {
 
 import { SerializableInternalBuffer, TensorMetadata } from './proxy-messages';
 import { copyFromExternalBuffer, createSession, endProfiling, releaseSession, run } from './proxy-wrapper';
-import { isGpuBufferSupportedType } from './wasm-common';
+import { isGpuBufferSupportedType, isMLTensorSupportedType } from './wasm-common';
 import { isNode } from './wasm-utils-env';
 import { loadFile } from './wasm-utils-load-file';
 
@@ -22,6 +22,8 @@ export const encodeTensorMetadata = (tensor: Tensor, getName: () => string): Ten
       return [tensor.type, tensor.dims, tensor.data, 'cpu'];
     case 'gpu-buffer':
       return [tensor.type, tensor.dims, { gpuBuffer: tensor.gpuBuffer }, 'gpu-buffer'];
+    case 'ml-tensor':
+      return [tensor.type, tensor.dims, { mlTensor: tensor.mlTensor }, 'ml-tensor'];
     default:
       throw new Error(`invalid data location: ${tensor.location} for ${getName()}`);
   }
@@ -39,6 +41,14 @@ export const decodeTensorMetadata = (tensor: TensorMetadata): Tensor => {
       const { gpuBuffer, download, dispose } = tensor[2];
       return Tensor.fromGpuBuffer(gpuBuffer, { dataType, dims: tensor[1], download, dispose });
     }
+    case 'ml-tensor': {
+      const dataType = tensor[0];
+      if (!isMLTensorSupportedType(dataType)) {
+        throw new Error(`not supported data type: ${dataType} for deserializing MLTensor tensor`);
+      }
+      const { mlTensor, download, dispose } = tensor[2];
+      return Tensor.fromMLTensor(mlTensor, { dataType, dims: tensor[1], download, dispose });
+    }
     default:
       throw new Error(`invalid data location: ${tensor[3]}`);
   }
diff --git a/js/web/lib/wasm/wasm-common.ts b/js/web/lib/wasm/wasm-common.ts
index 78ff14540d8cb..ad2ff62587252 100644
--- a/js/web/lib/wasm/wasm-common.ts
+++ b/js/web/lib/wasm/wasm-common.ts
@@ -240,6 +240,20 @@ export const isGpuBufferSupportedType = (type: Tensor.Type): type is Tensor.GpuB
   type === 'uint4' ||
   type === 'int4';
 
+/**
+ * Check whether the given tensor type is supported by WebNN MLTensor
+ */
+export const isMLTensorSupportedType = (type: Tensor.Type): type is Tensor.MLTensorDataTypes =>
+  type === 'float32' ||
+  type === 'float16' ||
+  type === 'int32' ||
+  type === 'int64' ||
+  type === 'uint32' ||
+  type === 'uint64' ||
+  type === 'int8' ||
+  type === 'uint8' ||
+  type === 'bool';
+
 /**
  * Map string data location to integer value
  */
@@ -255,6 +269,8 @@ export const dataLocationStringToEnum = (location: Tensor.DataLocation): number
       return 3;
     case 'gpu-buffer':
       return 4;
+    case 'ml-tensor':
+      return 5;
     default:
       throw new Error(`unsupported data location: ${location}`);
   }
@@ -264,4 +280,4 @@ export const dataLocationStringToEnum = (location: Tensor.DataLocation): number
  * Map integer data location to string value
  */
 export const dataLocationEnumToString = (location: number): Tensor.DataLocation | undefined =>
-  (['none', 'cpu', 'cpu-pinned', 'texture', 'gpu-buffer'] as const)[location];
+  (['none', 'cpu', 'cpu-pinned', 'texture', 'gpu-buffer', 'ml-tensor'] as const)[location];
diff --git a/js/web/lib/wasm/wasm-core-impl.ts b/js/web/lib/wasm/wasm-core-impl.ts
index ed001cfa90f59..0668ac1931988 100644
--- a/js/web/lib/wasm/wasm-core-impl.ts
+++ b/js/web/lib/wasm/wasm-core-impl.ts
@@ -20,6 +20,7 @@ import {
   calculateTensorSizeInBytes,
   dataLocationStringToEnum,
   isGpuBufferSupportedType,
+  isMLTensorSupportedType,
   logLevelStringToEnum,
   tensorDataTypeEnumToString,
   tensorDataTypeStringToEnum,
@@ -162,7 +163,7 @@ export const initEp = async (env: Env, epName: string): Promise<void> => {
 /**
  * valid data locations for input/output tensors.
  */
-type SupportedTensorDataLocationForInputOutput = 'cpu' | 'cpu-pinned' | 'gpu-buffer';
+type SupportedTensorDataLocationForInputOutput = 'cpu' | 'cpu-pinned' | 'gpu-buffer' | 'ml-tensor';
 
 type IOBindingState = {
   /**
@@ -173,7 +174,7 @@ type IOBindingState = {
   /**
    * the preferred location for each output tensor.
    *
-   * value is one of 'cpu', 'cpu-pinned', 'gpu-buffer'.
+   * value is one of 'cpu', 'cpu-pinned', 'gpu-buffer', 'ml-tensor'.
    */
   readonly outputPreferredLocations: readonly SupportedTensorDataLocationForInputOutput[];
 
@@ -287,6 +288,7 @@ export const createSession = async (
     for (const provider of options?.executionProviders ?? []) {
       const providerName = typeof provider === 'string' ? provider : provider.name;
       if (providerName === 'webnn') {
+        wasm.shouldTransferToMLTensor = false;
         if (wasm.currentContext) {
           throw new Error('WebNN execution provider is already set.');
         }
@@ -318,7 +320,9 @@ export const createSession = async (
 
     // clear current MLContext after session creation
     if (wasm.currentContext) {
+      wasm.jsepRegisterMLContext!(sessionHandle, wasm.currentContext);
       wasm.currentContext = undefined;
+      wasm.shouldTransferToMLTensor = true;
     }
 
     const [inputCount, outputCount] = getSessionInputOutputCount(sessionHandle);
@@ -354,7 +358,7 @@ export const createSession = async (
           typeof options?.preferredOutputLocation === 'string'
             ? options.preferredOutputLocation
             : (options?.preferredOutputLocation?.[nameString] ?? 'cpu');
-        if (location !== 'cpu' && location !== 'cpu-pinned' && location !== 'gpu-buffer') {
+        if (location !== 'cpu' && location !== 'cpu-pinned' && location !== 'gpu-buffer' && location !== 'ml-tensor') {
           throw new Error(`Not supported preferred output location: ${location}.`);
         }
         if (enableGraphCapture && location !== 'gpu-buffer') {
@@ -366,9 +370,9 @@ export const createSession = async (
       }
     }
 
-    // use IO binding only when at least one output is preffered to be on GPU.
+    // use IO binding only when at least one output is preferred to be on GPU.
     let bindingState: IOBindingState | null = null;
-    if (!BUILD_DEFS.DISABLE_JSEP && outputPreferredLocations.some((l) => l === 'gpu-buffer')) {
+    if (!BUILD_DEFS.DISABLE_JSEP && outputPreferredLocations.some((l) => l === 'gpu-buffer' || l === 'ml-tensor')) {
       ioBindingHandle = wasm._OrtCreateBinding(sessionHandle);
       if (ioBindingHandle === 0) {
         checkLastError("Can't create IO binding.");
@@ -459,7 +463,7 @@ export const prepareInputOutputTensor = (
   let rawData: number;
   let dataByteLength: number;
 
-  if (dataType === 'string' && location === 'gpu-buffer') {
+  if (dataType === 'string' && (location === 'gpu-buffer' || location === 'ml-tensor')) {
     throw new Error('String tensor is not supported on GPU.');
   }
 
@@ -478,6 +482,15 @@ export const prepareInputOutputTensor = (
       throw new Error('Tensor location "gpu-buffer" is not supported without using WebGPU.');
     }
     rawData = registerBuffer(sessionId, index, gpuBuffer, dataByteLength);
+  } else if (location === 'ml-tensor') {
+    const mlTensor = tensor[2].mlTensor as MLTensor;
+    dataByteLength = calculateTensorSizeInBytes(tensorDataTypeStringToEnum(dataType), dims)!;
+
+    const registerMLTensor = wasm.jsepRegisterMLTensor;
+    if (!registerMLTensor) {
+      throw new Error('Tensor location "ml-tensor" is not supported without using WebNN.');
+    }
+    rawData = registerMLTensor(mlTensor, tensorDataTypeStringToEnum(dataType), dims);
   } else {
     const data = tensor[2];
 
@@ -563,6 +576,9 @@ export const run = async (
   const outputNamesOffset = wasm.stackAlloc(outputCount * 4);
 
   try {
+    // WebNN backend needs the active session to check MLTensors with the current context.
+    wasm.jsepOnRunStart?.(sessionHandle);
+
     [runOptionsHandle, runOptionsAllocs] = setRunOptions(options);
 
     // create input tensors
@@ -654,7 +670,6 @@ export const run = async (
       ]);
     }
 
-    wasm.jsepOnRunStart?.(sessionHandle);
     let errorCode: number;
     if (!BUILD_DEFS.DISABLE_JSEP && ioBindingState) {
       errorCode = await wasm._OrtRunWithBinding(
@@ -726,7 +741,7 @@ export const run = async (
         const preferredLocation = ioBindingState?.outputPreferredLocations[outputIndices[i]];
 
         if (type === 'string') {
-          if (preferredLocation === 'gpu-buffer') {
+          if (preferredLocation === 'gpu-buffer' || preferredLocation === 'ml-tensor') {
             throw new Error('String tensor is not supported on GPU.');
           }
           const stringData: string[] = [];
@@ -766,6 +781,37 @@ export const run = async (
               },
               'gpu-buffer',
             ]);
+          } else if (preferredLocation === 'ml-tensor' && size > 0) {
+            const ensureTensor = wasm.jsepEnsureTensor;
+            if (!ensureTensor) {
+              throw new Error('preferredLocation "ml-tensor" is not supported without using WebNN.');
+            }
+            const tensorSize = calculateTensorSizeInBytes(dataType, size);
+            if (tensorSize === undefined || !isMLTensorSupportedType(type)) {
+              throw new Error(`Unsupported data type: ${type}`);
+            }
+
+            // If the graph has been partitioned, the output tensor may have not been created. For this reason, we use
+            // ensureTensor to get/create the MLTensor. In which case, we don't need to copy the data if a new tensor
+            // has been created.
+            const mlTensor = await ensureTensor(dataOffset, dataType, dims, false);
+
+            // do not release the tensor right now. it will be released when user calls tensor.dispose().
+            keepOutputTensor = true;
+
+            output.push([
+              type,
+              dims,
+              {
+                mlTensor,
+                download: wasm.jsepCreateMLTensorDownloader!(dataOffset, type),
+                dispose: () => {
+                  wasm.jsepReleaseTensorId!(dataOffset);
+                  wasm._OrtReleaseTensor(tensor);
+                },
+              },
+              'ml-tensor',
+            ]);
           } else {
             const typedArrayConstructor = tensorTypeToTypedArrayConstructor(type);
             const data = new typedArrayConstructor(size);
diff --git a/js/web/lib/wasm/wasm-types.ts b/js/web/lib/wasm/wasm-types.ts
index 828cd3cfd94fa..3e08fe97f559d 100644
--- a/js/web/lib/wasm/wasm-types.ts
+++ b/js/web/lib/wasm/wasm-types.ts
@@ -7,6 +7,7 @@
 /// <reference path="jsep/webnn/webnn.d.ts" />
 
 import type { Tensor } from 'onnxruntime-common';
+import { DataType } from './wasm-common';
 
 /* eslint-disable @typescript-eslint/naming-convention */
 
@@ -27,6 +28,16 @@ export declare namespace JSEP {
   type CaptureBeginFunction = () => void;
   type CaptureEndFunction = () => void;
   type ReplayFunction = () => void;
+  type ReserveTensorIdFunction = () => number;
+  type ReleaseTensorIdFunction = (tensorId: number) => void;
+  type EnsureTensorFunction = (
+    tensorId: number,
+    dataType: DataType,
+    shape: readonly number[],
+    copyOld: boolean,
+  ) => Promise<MLTensor>;
+  type UploadTensorFunction = (tensorId: number, data: Uint8Array) => void;
+  type DownloadTensorFunction = (tensorId: number, dstBuffer: ArrayBufferView | ArrayBuffer) => Promise<undefined>;
 
   export interface Module extends WebGpuModule, WebNnModule {
     /**
@@ -62,7 +73,17 @@ export declare namespace JSEP {
         replay: ReplayFunction,
       ],
     ): void;
-    jsepInit(name: 'webnn', initParams?: never): void;
+    jsepInit(
+      name: 'webnn',
+      initParams: [
+        backend: BackendType,
+        reserveTensorId: ReserveTensorIdFunction,
+        releaseTensorId: ReleaseTensorIdFunction,
+        ensureTensor: EnsureTensorFunction,
+        uploadTensor: UploadTensorFunction,
+        downloadTensor: DownloadTensorFunction,
+      ],
+    ): void;
   }
 
   export interface WebGpuModule {
@@ -134,6 +155,70 @@ export declare namespace JSEP {
      * Active MLContext used to create WebNN EP.
      */
     currentContext: MLContext;
+
+    /**
+     * Disables creating MLTensors. This is used to avoid creating MLTensors for graph initializers.
+     */
+    shouldTransferToMLTensor: boolean;
+
+    /**
+     * [exported from pre-jsep.js] Register MLContext for a session.
+     * @param sessionId - specify the session ID.
+     * @param context - specify the MLContext.
+     * @returns
+     */
+    jsepRegisterMLContext: (sessionId: number, context: MLContext) => void;
+    /**
+     * [exported from pre-jsep.js] Reserve a MLTensor ID attached to the current session.
+     * @returns the MLTensor ID.
+     */
+    jsepReserveTensorId: () => number;
+    /**
+     * [exported from pre-jsep.js] Release an MLTensor ID from use and destroys underlying MLTensor if no longer in use.
+     * @param tensorId - specify the MLTensor ID.
+     * @returns
+     */
+    jsepReleaseTensorId: (tensorId: number) => void;
+    /**
+     * [exported from pre-jsep.js] Ensure that an MLTensor of a given type and shape exists for a MLTensor ID.
+     * @param tensorId - specify the MLTensor ID.
+     * @param onnxDataType - specify the data type.
+     * @param shape - specify the dimensions (WebNN shape) of the tensor.
+     * @param copyOld - specify whether to copy the old tensor if a new tensor was created.
+     * @returns the MLTensor associated with the tensor ID.
+     */
+    jsepEnsureTensor: (tensorId: number, dataType: DataType, shape: number[], copyOld: boolean) => Promise<MLTensor>;
+    /**
+     * [exported from pre-jsep.js] Upload data to an MLTensor.
+     * @param tensorId - specify the MLTensor ID.
+     * @param data - specify the data to upload. It can be a TensorProto::data_type or a WebNN MLOperandDataType.
+     * @returns
+     */
+    jsepUploadTensor: (tensorId: number, data: Uint8Array) => void;
+    /**
+     * [exported from pre-jsep.js] Download data from an MLTensor.
+     * @param tensorId - specify the MLTensor ID.
+     * @returns the downloaded data.
+     */
+    jsepDownloadTensor: (tensorId: number, dstBuffer: ArrayBufferView | ArrayBuffer) => Promise<undefined>;
+    /**
+     * [exported from pre-jsep.js] Creates a downloader function to download data from an MLTensor.
+     * @param tensorId - specify the MLTensor ID.
+     * @param type - specify the data type.
+     * @returns the downloader function.
+     */
+    jsepCreateMLTensorDownloader: (
+      tensorId: number,
+      type: Tensor.MLTensorDataTypes,
+    ) => () => Promise<Tensor.DataTypeMap[Tensor.MLTensorDataTypes]>;
+    /**
+     * [exported from pre-jsep.js] Registers an external MLTensor to a session.
+     * @param tensor - specify the MLTensor.
+     * @param dataType - specify the data type.
+     * @param dimensions - specify the dimensions.
+     * @returns the MLTensor ID for the external MLTensor.
+     */
+    jsepRegisterMLTensor: (tensor: MLTensor, onnxDataType: DataType, dimensions: readonly number[]) => number;
   }
 }
 
diff --git a/js/web/script/test-runner-cli-args.ts b/js/web/script/test-runner-cli-args.ts
index d237293dbb192..e94e11d0ace56 100644
--- a/js/web/script/test-runner-cli-args.ts
+++ b/js/web/script/test-runner-cli-args.ts
@@ -62,6 +62,8 @@ Options:
                                  none            (default)
                                  gpu-tensor      use pre-allocated GPU tensors for inputs and outputs
                                  gpu-location    use pre-allocated GPU tensors for inputs and set preferredOutputLocation to 'gpu-buffer'
+                                 ml-tensor       use pre-allocated MLTensor tensors for inputs and outputs
+                                 ml-location     use pre-allocated MLTensor tensors for inputs and set preferredOutputLocation to 'ml-tensor'
 
 *** Logging Options ***
 
@@ -133,7 +135,7 @@ export declare namespace TestRunnerCliArgs {
   type Backend = 'cpu' | 'webgl' | 'webgpu' | 'wasm' | 'onnxruntime' | 'webnn';
   type Environment = 'chrome' | 'chromecanary' | 'edge' | 'firefox' | 'electron' | 'safari' | 'node' | 'bs';
   type BundleMode = 'dev' | 'perf';
-  type IOBindingMode = 'none' | 'gpu-tensor' | 'gpu-location';
+  type IOBindingMode = 'none' | 'gpu-tensor' | 'gpu-location' | 'ml-tensor' | 'ml-location';
 }
 
 export interface TestRunnerCliArgs {
@@ -455,7 +457,7 @@ export function parseTestRunnerCliArgs(cmdlineArgs: string[]): TestRunnerCliArgs
   // Option: -i=<...>, --io-binding=<...>
   const ioBindingArg = args['io-binding'] || args.i;
   const ioBindingMode = typeof ioBindingArg !== 'string' ? 'none' : ioBindingArg;
-  if (['none', 'gpu-tensor', 'gpu-location'].indexOf(ioBindingMode) === -1) {
+  if (['none', 'gpu-tensor', 'gpu-location', 'ml-tensor', 'ml-location'].indexOf(ioBindingMode) === -1) {
     throw new Error(`not supported io binding mode ${ioBindingMode}`);
   }
 
diff --git a/js/web/script/test-runner-cli.ts b/js/web/script/test-runner-cli.ts
index a9fcd7b876b2f..68ee58dab7094 100644
--- a/js/web/script/test-runner-cli.ts
+++ b/js/web/script/test-runner-cli.ts
@@ -380,7 +380,7 @@ async function main() {
     }
 
     let ioBinding: Test.IOBindingMode;
-    if (backend !== 'webgpu' && args.ioBindingMode !== 'none') {
+    if (!['webgpu', 'webnn'].includes(backend) && args.ioBindingMode !== 'none') {
       npmlog.warn(
         'TestRunnerCli.Init.Model',
         `Ignoring IO Binding Mode "${args.ioBindingMode}" for backend "${backend}".`,
diff --git a/js/web/test/test-runner.ts b/js/web/test/test-runner.ts
index aa9555c191501..2176a776a0192 100644
--- a/js/web/test/test-runner.ts
+++ b/js/web/test/test-runner.ts
@@ -1,6 +1,11 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+// WebNN API currently does not have a TypeScript definition file. This file is a workaround with types generated from
+// WebNN API specification.
+// https://github.com/webmachinelearning/webnn/issues/677
+/// <reference path="../lib/wasm/jsep/webnn/webnn.d.ts" />
+
 import { Float16Array as Float16ArrayPolyfill } from '@petamoriken/float16';
 import { expect } from 'chai';
 import * as ort from 'onnxruntime-common';
@@ -19,6 +24,7 @@ import { createView } from '../lib/wasm/jsep/tensor-view';
 import {
   calculateTensorSizeInBytes,
   isGpuBufferSupportedType,
+  isMLTensorSupportedType,
   tensorDataTypeStringToEnum,
 } from '../lib/wasm/wasm-common';
 
@@ -170,13 +176,20 @@ async function initializeSession(
     }`,
   );
 
+  let preferredOutputLocation: ort.Tensor.DataLocation | undefined;
+  if (ioBindingMode === 'gpu-location') {
+    preferredOutputLocation = 'gpu-buffer';
+  } else if (ioBindingMode === 'ml-location') {
+    preferredOutputLocation = 'ml-tensor';
+  }
+
   const profilerConfig = profile ? { maxNumberEvents: 65536 } : undefined;
   const sessionConfig = {
     ...sessionOptions,
     executionProviders: [backendHint],
     profiler: profilerConfig,
     enableProfiling: profile,
-    preferredOutputLocation: ioBindingMode === 'gpu-location' ? ('gpu-buffer' as const) : undefined,
+    preferredOutputLocation,
     externalData,
   };
 
@@ -219,6 +232,7 @@ export class ModelTestContext {
     readonly perfData: ModelTestContext.ModelTestPerfData,
     readonly ioBinding: Test.IOBindingMode,
     private readonly profile: boolean,
+    public readonly mlContext?: MLContext,
   ) {}
 
   /**
@@ -272,7 +286,24 @@ export class ModelTestContext {
 
       const initStart = now();
       const executionProviderConfig =
-        modelTest.backend === 'webnn' ? testOptions?.webnnOptions || 'webnn' : modelTest.backend!;
+        modelTest.backend === 'webnn' ? testOptions?.webnnOptions || { name: 'webnn' } : modelTest.backend!;
+      let mlContext: MLContext | undefined;
+      if (['ml-tensor', 'ml-location'].includes(modelTest.ioBinding)) {
+        const webnnOptions = executionProviderConfig as ort.InferenceSession.WebNNExecutionProviderOption;
+        const deviceType = (webnnOptions as ort.InferenceSession.WebNNContextOptions)?.deviceType;
+        const numThreads = (webnnOptions as ort.InferenceSession.WebNNContextOptions)?.numThreads;
+        const powerPreference = (webnnOptions as ort.InferenceSession.WebNNContextOptions)?.powerPreference;
+
+        mlContext = await navigator.ml.createContext({
+          deviceType,
+          numThreads,
+          powerPreference,
+        });
+        (executionProviderConfig as ort.InferenceSession.WebNNExecutionProviderOption).context = mlContext;
+        if (!deviceType) {
+          (executionProviderConfig as ort.InferenceSession.WebNNContextOptions).deviceType = deviceType;
+        }
+      }
       const session = await initializeSession(
         modelTest.modelUrl,
         executionProviderConfig,
@@ -295,6 +326,7 @@ export class ModelTestContext {
         { init: initEnd - initStart, firstRun: -1, runs: [], count: 0 },
         modelTest.ioBinding,
         profile,
+        mlContext,
       );
     } finally {
       this.initializing = false;
@@ -622,30 +654,82 @@ function createGpuTensorForOutput(type: ort.Tensor.Type, dims: readonly number[]
   });
 }
 
+async function createMLTensorForOutput(mlContext: MLContext, type: ort.Tensor.Type, dims: readonly number[]) {
+  if (!isMLTensorSupportedType(type)) {
+    throw new Error(`createMLTensorForOutput can not work with ${type} tensor`);
+  }
+
+  const dataType = type === 'bool' ? 'uint8' : type;
+
+  const mlTensor = await mlContext.createTensor({
+    dataType,
+    shape: dims as number[],
+    // Assign both shape and dimensions while transitioning to new API.
+    dimensions: dims as number[],
+    usage: MLTensorUsage.READ,
+  });
+
+  return ort.Tensor.fromMLTensor(mlTensor, {
+    dataType: type,
+    dims,
+    dispose: () => mlTensor.destroy(),
+    download: async () => {
+      const arrayBuffer = await mlContext.readTensor(mlTensor);
+      return createView(arrayBuffer, type) as ort.Tensor.DataTypeMap[ort.Tensor.MLTensorDataTypes];
+    },
+  });
+}
+
+async function createMLTensorForInput(mlContext: MLContext, cpuTensor: ort.Tensor): Promise<ort.Tensor> {
+  if (!isMLTensorSupportedType(cpuTensor.type) || Array.isArray(cpuTensor.data)) {
+    throw new Error(`createMLTensorForInput can not work with ${cpuTensor.type} tensor`);
+  }
+  const dataType = cpuTensor.type === 'bool' ? 'uint8' : cpuTensor.type;
+  const mlTensor = await mlContext.createTensor({
+    dataType,
+    shape: cpuTensor.dims as number[],
+    // Assign both shape and dimensions while transitioning to new API.
+    dimensions: cpuTensor.dims as number[],
+    usage: MLTensorUsage.WRITE,
+  });
+  mlContext.writeTensor(mlTensor, cpuTensor.data);
+  return ort.Tensor.fromMLTensor(mlTensor, {
+    dataType: cpuTensor.type,
+    dims: cpuTensor.dims,
+    dispose: () => mlTensor.destroy(),
+  });
+}
+
 export async function sessionRun(options: {
   session: ort.InferenceSession;
   feeds: Record<string, ort.Tensor>;
   outputsMetaInfo: Record<string, Pick<ort.Tensor, 'dims' | 'type'>>;
   ioBinding: Test.IOBindingMode;
+  mlContext?: MLContext;
 }): Promise<[number, number, ort.InferenceSession.OnnxValueMapType]> {
   const session = options.session;
   const feeds = options.feeds;
   const fetches: Record<string, ort.Tensor> = {};
 
-  // currently we only support IO Binding for WebGPU
+  // currently we only support IO Binding for WebGPU and WebNN
   //
-  // For inputs, we create GPU tensors on both 'gpu-tensor' and 'gpu-location' binding testing mode.
-  // For outputs, we create GPU tensors on 'gpu-tensor' binding testing mode only.
+  // For inputs, we create tensors on 'gpu-tensor', 'gpu-location', 'ml-tensor', and 'ml-location' binding testing
+  // modes.
+  // For outputs, we create tensors on 'gpu-tensor' and 'ml-tensor' binding testing modes.
   //              in 'gpu-device' binding mode, outputs are not pre-allocated.
-  const shouldUploadInput = options.ioBinding === 'gpu-tensor' || options.ioBinding === 'gpu-location';
-  const shouldUploadOutput = options.ioBinding === 'gpu-tensor';
+  const shouldUploadInput = ['gpu-tensor', 'gpu-location', 'ml-location', 'ml-tensor'].includes(options.ioBinding);
+  const shouldUploadOutput = options.ioBinding === 'gpu-tensor' || options.ioBinding === 'ml-tensor';
   try {
     if (shouldUploadInput) {
       // replace the CPU tensors in feeds into GPU tensors
       for (const name in feeds) {
         if (Object.hasOwnProperty.call(feeds, name)) {
           if (feeds[name].size > 0) {
-            feeds[name] = createGpuTensorForInput(feeds[name]);
+            if (options.ioBinding === 'ml-location' || options.ioBinding === 'ml-tensor') {
+              feeds[name] = await createMLTensorForInput(options.mlContext!, feeds[name]);
+            } else {
+              feeds[name] = createGpuTensorForInput(feeds[name]);
+            }
           }
         }
       }
@@ -658,7 +742,11 @@ export async function sessionRun(options: {
           if (dims.some((d) => d === 0)) {
             fetches[name] = new ort.Tensor(type, [], dims);
           } else {
-            fetches[name] = createGpuTensorForOutput(type, dims);
+            if (options.ioBinding === 'ml-tensor') {
+              fetches[name] = await createMLTensorForOutput(options.mlContext!, type, dims);
+            } else {
+              fetches[name] = createGpuTensorForOutput(type, dims);
+            }
           }
         }
       }
@@ -714,6 +802,7 @@ export async function runModelTestSet(
       feeds,
       outputsMetaInfo,
       ioBinding: context.ioBinding,
+      mlContext: context.mlContext,
     });
     if (context.perfData.count === 0) {
       context.perfData.firstRun = end - start;
diff --git a/js/web/test/test-types.ts b/js/web/test/test-types.ts
index be1e56485ec5a..29a11f969ffea 100644
--- a/js/web/test/test-types.ts
+++ b/js/web/test/test-types.ts
@@ -52,8 +52,12 @@ export declare namespace Test {
    * `preferredOutputLocation` will be set to `gpu-buffer`.
    * - gpu-tensor: inputs and outputs will all be pre-allocated as GPU tensors. `preferredOutputLocation`
    * will not be set.
+   * - ml-location: inputs will be pre-allocated as ML tensors; no output will be pre-allocated;
+   * `preferredOutputLocation` will be set to `ml-tensor`.
+   * - ml-tensor: inputs and outputs will all be pre-allocated as MLTensor tensors. `preferredOutputLocation`
+   * will not be set.
    */
-  export type IOBindingMode = 'none' | 'gpu-tensor' | 'gpu-location';
+  export type IOBindingMode = 'none' | 'gpu-tensor' | 'gpu-location' | 'ml-tensor' | 'ml-location';
 
   export interface ModelTestCase {
     name: string;
diff --git a/onnxruntime/core/framework/allocator.cc b/onnxruntime/core/framework/allocator.cc
index 5e66f2b99fded..b6dc8ad56f257 100644
--- a/onnxruntime/core/framework/allocator.cc
+++ b/onnxruntime/core/framework/allocator.cc
@@ -141,7 +141,8 @@ ORT_API_STATUS_IMPL(OrtApis::CreateMemoryInfo, _In_ const char* name1, enum OrtA
              strcmp(name1, onnxruntime::OpenVINO_GPU) == 0 ||
              strcmp(name1, onnxruntime::DML) == 0 ||
              strcmp(name1, onnxruntime::HIP) == 0 ||
-             strcmp(name1, onnxruntime::WEBGPU_BUFFER) == 0) {
+             strcmp(name1, onnxruntime::WEBGPU_BUFFER) == 0 ||
+             strcmp(name1, onnxruntime::WEBNN_TENSOR) == 0) {
     *out = new OrtMemoryInfo(
         name1, type, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, static_cast<OrtDevice::DeviceId>(id1)), id1,
         mem_type1);
diff --git a/onnxruntime/core/providers/webnn/allocator.cc b/onnxruntime/core/providers/webnn/allocator.cc
new file mode 100644
index 0000000000000..9c5cd651e1f00
--- /dev/null
+++ b/onnxruntime/core/providers/webnn/allocator.cc
@@ -0,0 +1,41 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/providers/webnn/allocator.h"
+
+#include "core/common/safeint.h"
+
+namespace onnxruntime {
+namespace webnn {
+
+void* WebNNTensorAllocator::Alloc(size_t size) {
+  if (size == 0) {
+    return nullptr;
+  }
+  if (!emscripten::val::module_property("shouldTransferToMLTensor").as<bool>()) {
+    // We don't need to transfer the tensor to an MLTensor, so we don't need to allocate an MLTensor id.
+    return nullptr;
+  }
+  void* p = EM_ASM_PTR({ return Module.jsepReserveTensorId(); });
+  allocations_[p] = size;
+  stats_.num_allocs++;
+  stats_.bytes_in_use += SafeInt<int64_t>(size);
+  return p;
+}
+
+void WebNNTensorAllocator::Free(void* p) {
+  if (p == nullptr) {
+    return;
+  }
+  EM_ASM({ Module.jsepReleaseTensorId($0); }, p);
+  size_t size = allocations_[p];
+  stats_.bytes_in_use -= size;
+  allocations_.erase(p);
+}
+
+void WebNNTensorAllocator::GetStats(AllocatorStats* stats) {
+  *stats = stats_;
+}
+
+}  // namespace webnn
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/webnn/allocator.h b/onnxruntime/core/providers/webnn/allocator.h
new file mode 100644
index 0000000000000..c06da909801cc
--- /dev/null
+++ b/onnxruntime/core/providers/webnn/allocator.h
@@ -0,0 +1,32 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include <emscripten.h>
+#include <emscripten/val.h>
+
+#include "core/common/inlined_containers.h"
+#include "core/framework/allocator.h"
+#include "core/framework/ortdevice.h"
+
+namespace onnxruntime {
+namespace webnn {
+
+class WebNNTensorAllocator : public IAllocator {
+ public:
+  WebNNTensorAllocator() : IAllocator(OrtMemoryInfo(WEBNN_TENSOR, OrtAllocatorType::OrtDeviceAllocator, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, 0), 0, OrtMemTypeDefault)) {}
+
+  void* Alloc(size_t size) override;
+
+  void Free(void* p) override;
+
+  void GetStats(AllocatorStats* stats) override;
+
+ private:
+  AllocatorStats stats_;
+  InlinedHashMap<void*, size_t> allocations_;
+};
+
+}  // namespace webnn
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/webnn/builders/helper.cc b/onnxruntime/core/providers/webnn/builders/helper.cc
index c4a633fcc92bb..b90c7d76a6507 100644
--- a/onnxruntime/core/providers/webnn/builders/helper.cc
+++ b/onnxruntime/core/providers/webnn/builders/helper.cc
@@ -12,6 +12,19 @@
 namespace onnxruntime {
 namespace webnn {
 
+WebnnDeviceType DeviceTypeFromString(const std::string_view& device_type) {
+  if (device_type == "gpu") {
+    return WebnnDeviceType::GPU;
+  }
+  if (device_type == "cpu") {
+    return WebnnDeviceType::CPU;
+  }
+  if (device_type == "npu") {
+    return WebnnDeviceType::NPU;
+  }
+  ORT_THROW("Unknown WebNN deviceType.");
+}
+
 InitializedTensorSet CollectAllInitializedTensors(const GraphViewer& graph_viewer) {
   InitializedTensorSet all_initializers;
   if (graph_viewer.IsSubgraph()) {
@@ -243,5 +256,10 @@ bool SetWebnnDataType(emscripten::val& desc, const int32_t data_type) {
   }
 }
 
+bool IsMLTensorSupported() {
+  static bool is_supported = !emscripten::val::global("MLTensor").isUndefined();
+  return is_supported;
+}
+
 }  // namespace webnn
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/webnn/builders/helper.h b/onnxruntime/core/providers/webnn/builders/helper.h
index 7ba1d18fa1a76..529463f0808ad 100644
--- a/onnxruntime/core/providers/webnn/builders/helper.h
+++ b/onnxruntime/core/providers/webnn/builders/helper.h
@@ -31,6 +31,8 @@ enum class WebnnDeviceType {
   NPU,
 };
 
+WebnnDeviceType DeviceTypeFromString(const std::string_view& device_type);
+
 // Collects all the initializer tensors in the subGraph and its ancestor graphs.
 InitializedTensorSet CollectAllInitializedTensors(const GraphViewer& graph_viewer);
 
@@ -292,5 +294,7 @@ bool GetBidirectionalBroadcastShape(std::vector<int64_t>& shape_a,
 
 bool SetWebnnDataType(emscripten::val& desc, const int32_t data_type);
 
+bool IsMLTensorSupported();
+
 }  // namespace webnn
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/webnn/builders/model.cc b/onnxruntime/core/providers/webnn/builders/model.cc
index 8cd2e8d0ffad3..fcfdb146bff34 100644
--- a/onnxruntime/core/providers/webnn/builders/model.cc
+++ b/onnxruntime/core/providers/webnn/builders/model.cc
@@ -11,21 +11,30 @@
 #include "core/common/safeint.h"
 #include "core/graph/onnx_protobuf.h"
 #include "core/providers/common.h"
-#include "core/providers/webnn/builders/helper.h"
 #include "model.h"
 
 namespace onnxruntime {
 namespace webnn {
 
-Model::Model(const emscripten::val& context, const emscripten::val& graph, const logging::Logger& logger)
+Model::Model(const emscripten::val& context, const emscripten::val& graph, const logging::Logger& logger, bool use_dispatch)
     : wnn_context_(context),
       wnn_graph_(graph),
-      logger_(logger) {}
+      logger_(logger),
+      use_dispatch_(use_dispatch) {}
 
 Model::~Model() {}
 
 Status Model::Predict(const InlinedHashMap<std::string, OnnxTensorData>& inputs,
                       const InlinedHashMap<std::string, OnnxTensorData>& outputs) {
+  if (use_dispatch_) {
+    return Dispatch(inputs, outputs);
+  } else {
+    return Compute(inputs, outputs);
+  }
+}
+
+onnxruntime::common::Status Model::Compute(const InlinedHashMap<std::string, OnnxTensorData>& inputs,
+                                           const InlinedHashMap<std::string, OnnxTensorData>& outputs) {
   for (const auto& input : inputs) {
     const std::string& name = input.first;
     const struct OnnxTensorData tensor = input.second;
@@ -142,6 +151,40 @@ Status Model::Predict(const InlinedHashMap<std::string, OnnxTensorData>& inputs,
   return Status::OK();
 }
 
+onnxruntime::common::Status Model::Dispatch(const InlinedHashMap<std::string, OnnxTensorData>& inputs,
+                                            const InlinedHashMap<std::string, OnnxTensorData>& outputs) {
+  auto jsepEnsureTensor = emscripten::val::module_property("jsepEnsureTensor");
+  auto promises = emscripten::val::array();
+  for (const auto& [_, tensor] : inputs) {
+    emscripten::val shape = emscripten::val::array();
+    for (const auto& dim : tensor.tensor_info.shape) {
+      uint32_t dim_val = SafeInt<uint32_t>(dim);
+      shape.call<void>("push", dim_val);
+    }
+    auto ml_tensor = jsepEnsureTensor(reinterpret_cast<intptr_t>(tensor.buffer), tensor.tensor_info.data_type, shape, true);
+    promises.call<void>("push", ml_tensor);
+  }
+  for (const auto& [_, tensor] : outputs) {
+    emscripten::val shape = emscripten::val::array();
+    for (const auto& dim : tensor.tensor_info.shape) {
+      uint32_t dim_val = SafeInt<uint32_t>(dim);
+      shape.call<void>("push", dim_val);
+    }
+    auto ml_tensor = jsepEnsureTensor(reinterpret_cast<intptr_t>(tensor.buffer), tensor.tensor_info.data_type, shape, false);
+    promises.call<void>("push", ml_tensor);
+  }
+  auto ml_tensors = emscripten::val::global("Promise").call<emscripten::val>("all", promises).await();
+  for (const auto& [name, _] : inputs) {
+    wnn_inputs_.set(name, ml_tensors.call<emscripten::val>("shift"));
+  }
+  for (const auto& [name, _] : outputs) {
+    wnn_outputs_.set(name, ml_tensors.call<emscripten::val>("shift"));
+  }
+  wnn_context_.call<void>("dispatch", wnn_graph_, wnn_inputs_, wnn_outputs_);
+
+  return Status::OK();
+}
+
 const OnnxTensorInfo& Model::GetInputOutputInfo(const std::string& name) const {
   return input_output_info_.at(name);
 }
@@ -156,6 +199,10 @@ void Model::SetOutputMap(InlinedHashMap<std::string, size_t>&& output_map) {
 
 // Pre-allocate the input and output buffers for the WebNN graph.
 void Model::AllocateInputOutputBuffers() {
+  // We don't need to allocate JS ArrayBuffers if the WebNN API supports MLTensor.
+  if (use_dispatch_) {
+    return;
+  }
   for (const auto& input : inputs_) {
     const auto& input_info = input_output_info_.at(input);
     const auto input_shape = input_info.shape;
diff --git a/onnxruntime/core/providers/webnn/builders/model.h b/onnxruntime/core/providers/webnn/builders/model.h
index 5119dbbbc9858..c554dcb6f6877 100644
--- a/onnxruntime/core/providers/webnn/builders/model.h
+++ b/onnxruntime/core/providers/webnn/builders/model.h
@@ -56,6 +56,12 @@ class Model {
   size_t GetMappedOutputIdx(const std::string& name) const;
 
  private:
+  onnxruntime::common::Status Dispatch(const InlinedHashMap<std::string, OnnxTensorData>& inputs,
+                                       const InlinedHashMap<std::string, OnnxTensorData>& outputs);
+
+  onnxruntime::common::Status Compute(const InlinedHashMap<std::string, OnnxTensorData>& inputs,
+                                      const InlinedHashMap<std::string, OnnxTensorData>& outputs);
+
   emscripten::val wnn_context_ = emscripten::val::object();
   emscripten::val wnn_graph_ = emscripten::val::object();
   const logging::Logger& logger_;
@@ -73,7 +79,9 @@ class Model {
 
   OrtMutex mutex_;
 
-  Model(const emscripten::val& context, const emscripten::val& path, const logging::Logger& logger);
+  bool use_dispatch_;
+
+  Model(const emscripten::val& context, const emscripten::val& path, const logging::Logger& logger, bool use_dispatch);
 
   void SetInputOutputInfo(InlinedHashMap<std::string, OnnxTensorInfo>&& input_output_info) {
     input_output_info_ = std::move(input_output_info);
diff --git a/onnxruntime/core/providers/webnn/builders/model_builder.cc b/onnxruntime/core/providers/webnn/builders/model_builder.cc
index f92fda8c74717..044baa738e8c4 100644
--- a/onnxruntime/core/providers/webnn/builders/model_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/model_builder.cc
@@ -340,7 +340,7 @@ Status ModelBuilder::Compile(std::unique_ptr<Model>& model) {
   }
   // Explicitly release the WebNN builder to free memory.
   wnn_builder_ = emscripten::val::undefined();
-  model.reset(new Model(std::move(wnn_context_), std::move(wnn_graph), logger_));
+  model.reset(new Model(std::move(wnn_context_), std::move(wnn_graph), logger_, IsMLTensorSupported()));
   model->SetInputs(std::move(input_names_));
   model->SetOutputs(std::move(output_names_));
   model->SetInputOutputInfo(std::move(input_output_info_));
diff --git a/onnxruntime/core/providers/webnn/data_transfer.cc b/onnxruntime/core/providers/webnn/data_transfer.cc
new file mode 100644
index 0000000000000..44e9bf9edf3d9
--- /dev/null
+++ b/onnxruntime/core/providers/webnn/data_transfer.cc
@@ -0,0 +1,44 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/providers/webnn/data_transfer.h"
+
+#include <emscripten.h>
+#include "core/framework/tensor.h"
+
+namespace onnxruntime {
+namespace webnn {
+
+bool DataTransfer::CanCopy(const OrtDevice& src_device, const OrtDevice& dst_device) const {
+  // Copying data between MLTensors is not supported by WebNN.
+  return (dst_device.Type() == OrtDevice::GPU && src_device.Type() == OrtDevice::CPU) ||
+         (dst_device.Type() == OrtDevice::CPU && src_device.Type() == OrtDevice::GPU);
+}
+
+common::Status DataTransfer::CopyTensor(const Tensor& src, Tensor& dst) const {
+  if (!emscripten::val::module_property("shouldTransferToMLTensor").as<bool>()) {
+    // We don't need to transfer the tensor to an MLTensor, so we don't need to copy the data.
+    return Status::OK();
+  }
+
+  size_t bytes = src.SizeInBytes();
+  if (bytes > 0) {
+    const void* src_data = src.DataRaw();
+    void* dst_data = dst.MutableDataRaw();
+
+    const auto& dst_device = dst.Location().device;
+
+    if (dst_device.Type() == OrtDevice::GPU) {
+      EM_ASM({ Module.jsepUploadTensor($0, HEAPU8.subarray($1, $1 + $2)); }, dst_data, reinterpret_cast<intptr_t>(src_data), bytes);
+    } else {
+      auto jsepDownloadTensor = emscripten::val::module_property("jsepDownloadTensor");
+      auto subarray = emscripten::typed_memory_view(bytes, static_cast<char*>(dst_data));
+      jsepDownloadTensor(reinterpret_cast<intptr_t>(src_data), subarray).await();
+    }
+  }
+
+  return Status::OK();
+}
+
+}  // namespace webnn
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/webnn/data_transfer.h b/onnxruntime/core/providers/webnn/data_transfer.h
new file mode 100644
index 0000000000000..03cfada46d1a0
--- /dev/null
+++ b/onnxruntime/core/providers/webnn/data_transfer.h
@@ -0,0 +1,21 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include <emscripten/val.h>
+
+#include "core/framework/data_transfer.h"
+
+namespace onnxruntime {
+namespace webnn {
+
+class DataTransfer : public IDataTransfer {
+ public:
+  bool CanCopy(const OrtDevice& src_device, const OrtDevice& dst_device) const override;
+
+  common::Status CopyTensor(const Tensor& src, Tensor& dst) const override;
+};
+
+}  // namespace webnn
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/webnn/webnn_execution_provider.cc b/onnxruntime/core/providers/webnn/webnn_execution_provider.cc
index b729623c5d3d8..2258d1ac1cd8f 100644
--- a/onnxruntime/core/providers/webnn/webnn_execution_provider.cc
+++ b/onnxruntime/core/providers/webnn/webnn_execution_provider.cc
@@ -5,11 +5,14 @@
 #include "webnn_execution_provider.h"
 
 #include "core/framework/compute_capability.h"
+#include "core/framework/data_transfer_manager.h"
 #include "core/framework/memcpy.h"
 #include "core/framework/kernel_registry.h"
 #include "core/graph/graph_viewer.h"
 #include "core/session/onnxruntime_cxx_api.h"
 #include "core/common/safeint.h"
+#include "core/providers/webnn/allocator.h"
+#include "core/providers/webnn/data_transfer.h"
 
 #include "builders/model.h"
 #include "builders/helper.h"
@@ -18,20 +21,14 @@
 namespace onnxruntime {
 
 WebNNExecutionProvider::WebNNExecutionProvider(const std::string& webnn_device_flags)
-    : IExecutionProvider{onnxruntime::kWebNNExecutionProvider} {
-  // WebNN EP uses NHWC layout for CPU XNNPACK backend and NCHW for GPU DML backend.
-  if (webnn_device_flags.compare("cpu") == 0) {
-    wnn_device_type_ = webnn::WebnnDeviceType::CPU;
-  } else {
-    if (webnn_device_flags.compare("gpu") == 0) {
-      wnn_device_type_ = webnn::WebnnDeviceType::GPU;
-    } else if (webnn_device_flags.compare("npu") == 0) {
-      wnn_device_type_ = webnn::WebnnDeviceType::NPU;
-    } else {
-      ORT_THROW("Unknown WebNN deviceType.");
-    }
-  }
-
+    : IExecutionProvider{
+          onnxruntime::kWebNNExecutionProvider,
+          // If MLTensor is supported, we force all the tensors to be allocated as MLTensor.
+          OrtDevice(
+              webnn::IsMLTensorSupported() ? OrtDevice::GPU : OrtDevice::CPU,
+              OrtDevice::MemType::DEFAULT,
+              0)},
+      wnn_device_type_(webnn::DeviceTypeFromString(webnn_device_flags)) {
   wnn_context_ = emscripten::val::module_property("currentContext");
   if (!wnn_context_.as<bool>()) {
     ORT_THROW("Failed to create WebNN context.");
@@ -322,6 +319,32 @@ common::Status WebNNExecutionProvider::Compile(const std::vector<FusedNodeAndGra
   return Status::OK();
 }
 
+class WebNNMemcpy : public OpKernel {
+ public:
+  explicit WebNNMemcpy(const OpKernelInfo& info) : OpKernel(info) {}
+
+  Status Compute(OpKernelContext* context) const override {
+    auto jsepEnsureTensor = emscripten::val::module_property("jsepEnsureTensor");
+    const auto* X = context->Input<Tensor>(0);
+    ORT_ENFORCE(X != nullptr, "Memcpy: input tensor is null");
+    auto* Y = context->Output(0, X->Shape());
+    ORT_ENFORCE(X != nullptr, "Memcpy: output tensor is null");
+    emscripten::val shape = emscripten::val::array();
+    for (auto dim : X->Shape().GetDims()) {
+      shape.call<void>("push", SafeInt<uint32_t>(dim).Ref());
+    }
+
+    jsepEnsureTensor(reinterpret_cast<intptr_t>(Y->MutableDataRaw()),
+                     Y->GetElementType(),
+                     shape, false)
+        .await();
+
+    const auto* data_transfer = Info().GetDataTransferManager().GetDataTransfer(X->Location().device, Y->Location().device);
+
+    return data_transfer->CopyTensor(*X, *Y);
+  }
+};
+
 ONNX_OPERATOR_KERNEL_EX(
     MemcpyFromHost,
     kOnnxDomain,
@@ -330,7 +353,7 @@ ONNX_OPERATOR_KERNEL_EX(
     KernelDefBuilder()
         .InputMemoryType(OrtMemTypeCPUInput, 0)
         .TypeConstraint("T", DataTypeImpl::AllFixedSizeTensorTypes()),
-    Memcpy);
+    WebNNMemcpy);
 
 ONNX_OPERATOR_KERNEL_EX(
     MemcpyToHost,
@@ -373,4 +396,22 @@ WebNNExecutionProvider::GetKernelRegistry() const {
   return kernel_registry;
 }
 
+std::unique_ptr<onnxruntime::IDataTransfer> WebNNExecutionProvider::GetDataTransfer() const {
+  if (!webnn::IsMLTensorSupported()) {
+    return nullptr;
+  }
+  return std::make_unique<webnn::DataTransfer>();
+}
+
+std::vector<AllocatorPtr> WebNNExecutionProvider::CreatePreferredAllocators() {
+  if (!webnn::IsMLTensorSupported()) {
+    return {};
+  }
+  AllocatorCreationInfo customAllocatorCreationInfo([&](OrtDevice::DeviceId) {
+    return std::make_unique<webnn::WebNNTensorAllocator>();
+  },
+                                                    0, false);
+  return {CreateAllocator(customAllocatorCreationInfo)};
+}
+
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/webnn/webnn_execution_provider.h b/onnxruntime/core/providers/webnn/webnn_execution_provider.h
index 8ea8cedf04300..26c5e476bcc4f 100644
--- a/onnxruntime/core/providers/webnn/webnn_execution_provider.h
+++ b/onnxruntime/core/providers/webnn/webnn_execution_provider.h
@@ -40,6 +40,8 @@ class WebNNExecutionProvider : public IExecutionProvider {
 #endif
 
   std::shared_ptr<KernelRegistry> GetKernelRegistry() const override;
+  std::unique_ptr<onnxruntime::IDataTransfer> GetDataTransfer() const override;
+  std::vector<AllocatorPtr> CreatePreferredAllocators() override;
 
  private:
   emscripten::val wnn_context_ = emscripten::val::undefined();
diff --git a/onnxruntime/wasm/api.cc b/onnxruntime/wasm/api.cc
index 0e58bb4f93f7f..5173125cb8634 100644
--- a/onnxruntime/wasm/api.cc
+++ b/onnxruntime/wasm/api.cc
@@ -23,7 +23,8 @@ enum DataLocation {
   DATA_LOCATION_CPU = 1,
   DATA_LOCATION_CPU_PINNED = 2,
   DATA_LOCATION_TEXTURE = 3,
-  DATA_LOCATION_GPU_BUFFER = 4
+  DATA_LOCATION_GPU_BUFFER = 4,
+  DATA_LOCATION_ML_TENSOR = 5
 };
 
 static_assert(sizeof(const char*) == sizeof(size_t), "size of a pointer and a size_t value should be the same.");
@@ -235,7 +236,8 @@ void OrtFree(void* ptr) {
 OrtValue* OrtCreateTensor(int data_type, void* data, size_t data_length, size_t* dims, size_t dims_length, int data_location) {
   if (data_location != DATA_LOCATION_CPU &&
       data_location != DATA_LOCATION_CPU_PINNED &&
-      data_location != DATA_LOCATION_GPU_BUFFER) {
+      data_location != DATA_LOCATION_GPU_BUFFER &&
+      data_location != DATA_LOCATION_ML_TENSOR) {
     std::ostringstream ostr;
     ostr << "Invalid data location: " << data_location;
     CheckStatus(Ort::GetApi().CreateStatus(ORT_INVALID_ARGUMENT, ostr.str().c_str()));
@@ -264,10 +266,15 @@ OrtValue* OrtCreateTensor(int data_type, void* data, size_t data_length, size_t*
     return UNREGISTER_AUTO_RELEASE(value);
   } else {
     OrtMemoryInfo* memory_info = nullptr;
-    if (data_location != DATA_LOCATION_GPU_BUFFER) {
-      RETURN_NULLPTR_IF_ERROR(CreateCpuMemoryInfo, OrtDeviceAllocator, OrtMemTypeDefault, &memory_info);
-    } else {
-      RETURN_NULLPTR_IF_ERROR(CreateMemoryInfo, "WebGPU_Buffer", OrtDeviceAllocator, 0, OrtMemTypeDefault, &memory_info);
+    switch (data_location) {
+      case DATA_LOCATION_GPU_BUFFER:
+        RETURN_NULLPTR_IF_ERROR(CreateMemoryInfo, "WebGPU_Buffer", OrtDeviceAllocator, 0, OrtMemTypeDefault, &memory_info);
+        break;
+      case DATA_LOCATION_ML_TENSOR:
+        RETURN_NULLPTR_IF_ERROR(CreateMemoryInfo, "WebNN_Tensor", OrtDeviceAllocator, 0, OrtMemTypeDefault, &memory_info);
+        break;
+      default:
+        RETURN_NULLPTR_IF_ERROR(CreateCpuMemoryInfo, OrtDeviceAllocator, OrtMemTypeDefault, &memory_info);
     }
     REGISTER_AUTO_RELEASE_HANDLE(MemoryInfo, memory_info);
 
@@ -418,15 +425,18 @@ int EMSCRIPTEN_KEEPALIVE OrtBindOutput(OrtIoBinding* io_binding,
     if (output_location != DATA_LOCATION_NONE &&
         output_location != DATA_LOCATION_CPU &&
         output_location != DATA_LOCATION_CPU_PINNED &&
-        output_location != DATA_LOCATION_GPU_BUFFER) {
+        output_location != DATA_LOCATION_GPU_BUFFER &&
+        output_location != DATA_LOCATION_ML_TENSOR) {
       std::ostringstream ostr;
       ostr << "Invalid data location (" << output_location << ") for output: \"" << name << "\".";
       return CheckStatus(Ort::GetApi().CreateStatus(ORT_INVALID_ARGUMENT, ostr.str().c_str()));
     }
 
     OrtMemoryInfo* memory_info = nullptr;
-    if (output_location != DATA_LOCATION_GPU_BUFFER) {
+    if (output_location != DATA_LOCATION_GPU_BUFFER && output_location != DATA_LOCATION_ML_TENSOR) {
       RETURN_ERROR_CODE_IF_ERROR(CreateCpuMemoryInfo, OrtDeviceAllocator, OrtMemTypeDefault, &memory_info);
+    } else if (output_location == DATA_LOCATION_ML_TENSOR) {
+      RETURN_ERROR_CODE_IF_ERROR(CreateMemoryInfo, "WebNN_Tensor", OrtDeviceAllocator, 0, OrtMemTypeDefault, &memory_info);
     } else {
       RETURN_ERROR_CODE_IF_ERROR(CreateMemoryInfo, "WebGPU_Buffer", OrtDeviceAllocator, 0, OrtMemTypeDefault, &memory_info);
     }
diff --git a/onnxruntime/wasm/pre-jsep.js b/onnxruntime/wasm/pre-jsep.js
index 70ed295887994..68332d07a9782 100644
--- a/onnxruntime/wasm/pre-jsep.js
+++ b/onnxruntime/wasm/pre-jsep.js
@@ -202,5 +202,38 @@ Module['jsepInit'] = (name, params) => {
     Module.jsepUploadExternalBuffer = (dataId, buffer) => {
       backend['upload'](dataId, buffer);
     };
+  } else if (name === 'webnn') {
+    // Functions called from EM_ASM need to be assigned in a way that can be minified.
+    // Functions called via emscripten::val::module_property need to be assigned by name so that the minifier doesn't
+    // change the name.
+
+    [Module.jsepBackend,
+     Module.jsepReserveTensorId,
+     Module.jsepReleaseTensorId,
+     Module['jsepEnsureTensor'],
+     Module.jsepUploadTensor,
+     Module['jsepDownloadTensor'],
+    ] = params;
+
+    // This function is called from both JS and an EM_ASM block, it needs both a minifiable name and an explicit name.
+    Module['jsepReleaseTensorId'] = Module.jsepReleaseTensorId;
+
+    // Functions called from JS also need to have explicit names.
+    const backend = Module.jsepBackend;
+    Module['jsepOnRunStart'] = sessionId => {
+      return backend['onRunStart'](sessionId);
+    };
+    Module['jsepRegisterMLContext'] = (sessionId, mlContext) => {
+      backend['registerMLContext'](sessionId, mlContext);
+    };
+    Module['jsepOnReleaseSession'] = sessionId => {
+      backend['onReleaseSession'](sessionId);
+    };
+    Module['jsepCreateMLTensorDownloader'] = (tensorId, type) => {
+      return backend['createMLTensorDownloader'](tensorId, type);
+    }
+    Module['jsepRegisterMLTensor'] = (tensor, dataType, shape) => {
+      return backend['registerMLTensor'](tensor, dataType, shape);
+    }
   }
 };