first operator (correctness validated)

microsoft · Sep 13, 2022 · fe850d1 · fe850d1
1 parent ba09337
commit fe850d1
Show file tree

Hide file tree

Showing 11 changed files with 438 additions and 7 deletions.
diff --git a/js/web/lib/onnxjs/backends/backend-webgpu.ts b/js/web/lib/onnxjs/backends/backend-webgpu.ts
@@ -30,9 +30,17 @@ export class WebGpuBackend implements Backend {
       Logger.setWithEnv(env);
 
       Logger.verbose('WebGpuBackend', 'Initialized successfully.');
+
+      this.device.onuncapturederror = ev => {
+        if (ev.error instanceof GPUValidationError) {
+          // eslint-disable-next-line no-console
+          console.error(`An uncaught WebGPU validation error was raised: ${ev.error.message}`);
+        }
+      };
+
       return true;
     } catch (e) {
-      Logger.warning('WebGpuBackend', `Unable to initialize WebGLBackend. ${e}`);
+      Logger.warning('WebGpuBackend', `Unable to initialize WebGpuBackend. ${e}`);
       return false;
     }
   }

diff --git a/js/web/lib/onnxjs/backends/webgpu/gpu-data-manager.ts b/js/web/lib/onnxjs/backends/webgpu/gpu-data-manager.ts
@@ -0,0 +1,126 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+import {Guid} from 'guid-typescript';
+import {sizeof, Tensor} from '../../tensor';
+import {ShapeUtil} from '../../util';
+import {GpuData, GpuDataId, GpuDataType} from './types';
+
+/**
+ * manages GpuDataId -> GpuBuffer
+ */
+export interface GpuDataManager {
+  uploadData(tensor: Tensor, gpuDataType: GpuDataType): GpuData;
+  createData(type: Tensor.DataType, dims: readonly number[], gpuDataType: GpuDataType): GpuData;
+  releaseData(tensorId: Tensor.Id): void;
+  downloadData(tensorId: Tensor.Id): Promise<ArrayBufferLike>;
+}
+
+interface DefaultCacheValue {
+  gpuData: GpuData;
+  size: number;
+}
+
+interface DownloadCacheValue {
+  gpuData: GpuData;
+  data: Promise<ArrayBufferLike>;
+}
+
+class GpuDataManagerImpl implements GpuDataManager {
+  defaultCache: Map<GpuDataId, DefaultCacheValue>;
+  downloadCache: Map<GpuDataId, DownloadCacheValue>;
+  constructor(private device: GPUDevice) {
+    this.defaultCache = new Map();
+    this.downloadCache = new Map();
+  }
+
+  uploadData(tensor: Tensor, gpuDataType: GpuDataType): GpuData {
+    if (gpuDataType !== GpuDataType.default) {
+      throw new Error('we only support default GPU data type now');
+    }
+
+    const cachedData = this.defaultCache.get(tensor.dataId);
+    if (cachedData) {
+      return cachedData.gpuData;
+    }
+
+    const src = tensor.numberData;
+    const srcArrayBuffer = src.buffer;
+    const srcOffset = src.byteOffset;
+    const srcLength = src.byteLength;
+
+    // create gpu buffer
+    const gpuBuffer =
+        this.device.createBuffer({mappedAtCreation: true, size: srcLength, usage: GPUBufferUsage.STORAGE});
+
+    // copy (upload) data
+    const arrayBuffer = gpuBuffer.getMappedRange();
+    new Uint8Array(arrayBuffer).set(new Uint8Array(srcArrayBuffer, srcOffset, srcLength));
+    gpuBuffer.unmap();
+
+    const gpuData = {id: tensor.dataId, type: GpuDataType.default, buffer: gpuBuffer};
+    this.defaultCache.set(gpuData.id, {gpuData, size: srcLength});
+    return gpuData;
+  }
+
+  createData(type: Tensor.DataType, dims: readonly number[], gpuDataType: GpuDataType): GpuData {
+    if (gpuDataType !== GpuDataType.default) {
+      throw new Error('we only support default GPU data type now');
+    }
+
+    // !!!
+    // !!! IMPORTANT: TODO: whether we should keep the storage buffer every time, or always create new ones.
+    // !!!                  This need to be figured out by performance test results.
+    // !!!
+
+    const elemCount = ShapeUtil.size(dims);
+    const bufferLength = sizeof(type) * elemCount;
+
+    // create gpu buffer
+    const gpuBuffer =
+        // eslint-disable-next-line no-bitwise
+        this.device.createBuffer({size: bufferLength, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC});
+
+    const gpuData = {id: Guid.create(), type: GpuDataType.default, buffer: gpuBuffer};
+    this.defaultCache.set(gpuData.id, {gpuData, size: bufferLength});
+    return gpuData;
+  }
+
+  releaseData(tensorId: Tensor.Id): void {
+    const cachedData = this.defaultCache.get(tensorId);
+    if (!cachedData) {
+      throw new Error('releasing data does not exist');
+    }
+
+    this.defaultCache.delete(tensorId);
+    cachedData.gpuData.buffer.destroy();
+  }
+
+  async downloadData(tensorId: Tensor.Id): Promise<ArrayBufferLike> {
+    const downloadData = this.downloadCache.get(tensorId);
+    if (downloadData) {
+      return downloadData.data;
+    }
+
+    const cachedData = this.defaultCache.get(tensorId);
+    if (!cachedData) {
+      throw new Error('data does not exist');
+    }
+
+    const commandEncoder = this.device.createCommandEncoder();
+    const gpuReadBuffer =
+        // eslint-disable-next-line no-bitwise
+        this.device.createBuffer({size: cachedData.size, usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ});
+    commandEncoder.copyBufferToBuffer(
+        cachedData.gpuData.buffer /* source buffer */, 0 /* source offset */, gpuReadBuffer /* destination buffer */,
+        0 /* destination offset */, cachedData.size /* size */
+    );
+    const gpuCommands = commandEncoder.finish();
+    this.device.queue.submit([gpuCommands]);
+
+    await gpuReadBuffer.mapAsync(GPUMapMode.READ);
+    return gpuReadBuffer.getMappedRange();
+  }
+}
+
+export const createGpuDataManager = (device: GPUDevice): GpuDataManager => new GpuDataManagerImpl(device);
diff --git a/js/web/lib/onnxjs/backends/webgpu/inference-handler.ts b/js/web/lib/onnxjs/backends/webgpu/inference-handler.ts
@@ -2,12 +2,85 @@
 // Licensed under the MIT License.
 
 import {InferenceHandler} from '../../backend';
+import {createView, Tensor} from '../../tensor';
 
+import {createGpuDataManager, GpuDataManager} from './gpu-data-manager';
 import {WebGpuSessionHandler} from './session-handler';
+import {GpuData, GpuDataType, ProgramInfo, ProgramInfoLoader} from './types';
+
+const getProgramInfoUniqueKey = (programInfo: ProgramInfo|ProgramInfoLoader, inputGpuDatas: GpuData[]): string => {
+  const inputs = inputGpuDatas.map(data => `${data.id}`).join('_');
+  let key = programInfo.name;
+  if (programInfo.cacheHint) {
+    key += '[' + programInfo.cacheHint + ']';
+  }
+  key += ':' + inputs;
+  return key;
+};
 
 export class WebGpuInferenceHandler implements InferenceHandler {
+  dataManager: GpuDataManager;
   constructor(public session: WebGpuSessionHandler) {
-    // TODO:
+    this.dataManager = createGpuDataManager(session.backend.device);
+  }
+
+  private uploadGpuData(tensor: Tensor, textureType: GpuDataType): GpuData {
+    if (this.session.isInitializer(tensor.dataId)) {
+      return this.session.dataManager.uploadData(tensor, textureType);
+    }
+
+    return this.dataManager.uploadData(tensor, textureType);
+  }
+
+  private createGpuData(type: Tensor.DataType, dims: readonly number[], gpuDataType: GpuDataType): GpuData {
+    return this.dataManager.createData(type, dims, gpuDataType);
+  }
+
+  run(program: ProgramInfoLoader|ProgramInfo, inputs: readonly Tensor[]): Tensor[] {
+    if (inputs.length !== program.inputTypes.length) {
+      throw new Error(`Input size must be equal to ${program.inputTypes.length}.`);
+    }
+
+    // create info for input
+    const inputDatas: GpuData[] = [];
+    for (let i = 0; i < program.inputTypes.length; ++i) {
+      inputDatas[i] = this.uploadGpuData(inputs[i], program.inputTypes[i]);
+    }
+
+    const key = getProgramInfoUniqueKey(program, inputDatas);
+    let artifact = this.session.programManager.getArtifact(key);
+    const programInfo = artifact ?
+        artifact.programInfo :
+        (typeof (program as ProgramInfoLoader).get === 'function' ? (program as ProgramInfoLoader).get() :
+                                                                    (program as ProgramInfo));
+
+    // create texture info for outputs
+    const outputDatas: GpuData[] = [];
+    for (let i = 0; i < programInfo.outputs.length; ++i) {
+      outputDatas.push(this.createGpuData(
+          programInfo.outputs[i].type, programInfo.outputs[i].dims, programInfo.outputs[i].gpuDataType));
+    }
+
+    if (!artifact) {
+      artifact = this.session.programManager.build(programInfo);
+      this.session.programManager.setArtifact(key, artifact);
+    }
+
+    this.session.programManager.run(artifact, inputDatas, outputDatas, artifact.programInfo.dispatchGroup(inputs));
+
+    const outputTensors: Tensor[] = [];
+    for (let i = 0; i < outputDatas.length; i++) {
+      const outputTensorInfo = artifact.programInfo.outputs[i];
+      const dims = outputTensorInfo.dims;
+      const type = outputTensorInfo.type;
+      const outputData = outputDatas[i];
+      const tensor = new Tensor(dims, type, undefined, async () => {
+        const data = await this.dataManager.downloadData(outputData.id);
+        return createView(data, type);
+      }, undefined, outputData.id);
+      outputTensors.push(tensor);
+    }
+    return outputTensors;
   }
 
   dispose(): void {}

diff --git a/js/web/lib/onnxjs/backends/webgpu/op-resolve-rules.ts b/js/web/lib/onnxjs/backends/webgpu/op-resolve-rules.ts
@@ -3,7 +3,10 @@
 
 import {OpSet} from '../../opset';
 
+import * as unaryOps from './ops/unary-op';
+
 export const WEBGPU_OP_RESOLVE_RULES: readonly OpSet.ResolveRule[] = [
+  ['Abs', '', '6+', unaryOps.abs]
   // ['Abs', '', '6+', unaryOps.abs],
   // ['Acos', '', '7+', unaryOps.acos],
   // ['Add', '', '7+', binaryOps.add],

diff --git a/js/web/lib/onnxjs/backends/webgpu/ops/unary-op.ts b/js/web/lib/onnxjs/backends/webgpu/ops/unary-op.ts
@@ -0,0 +1,40 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+import {Tensor} from '../../../tensor';
+import {WebGpuInferenceHandler} from '../inference-handler';
+import {GpuDataType} from '../types';
+
+export const abs = (handler: WebGpuInferenceHandler, inputs: Tensor[]): Tensor[] => handler.run(
+    {
+      name: 'Abs',
+      inputTypes: [GpuDataType.default],
+      // inputLayouts: [],
+      // outputLayouts: [],
+      shaderSource: `
+      @group(0) @binding(0) var<storage, read> inputData : array<f32>;
+      @group(0) @binding(1) var<storage, write> outputData : array<f32>;
+
+      @stage(compute) @workgroup_size(32)
+      fn main(@builtin(global_invocation_id) global_id : vec3<u32>) {
+        // Guard against out-of-bounds work group sizes
+        if (global_id.x * 32u >= ${inputs[0].size}u) {
+          return;
+        }
+
+        //
+        // TODO: SIMD?
+        //
+
+        let start = global_id.x * 32u;
+        let end = select(start + 32u, ${inputs[0].size}u, start + 32u > ${inputs[0].size}u);
+
+        for (var i = start; i < end; i = i + 1u) {
+          outputData[i] = abs(inputData[i]);
+        }
+      }`,
+      outputs: [{dims: inputs[0].dims, type: inputs[0].type, gpuDataType: GpuDataType.default}],
+      // entryPoint: 'main',
+      dispatchGroup: (inputTensors) => ({x: Math.ceil(inputTensors[0].size / 32)})
+    },
+    inputs);
diff --git a/js/web/lib/onnxjs/backends/webgpu/program-manager.ts b/js/web/lib/onnxjs/backends/webgpu/program-manager.ts
@@ -0,0 +1,70 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+import {Profiler} from '../../instrument';
+
+import {Artifact, GpuData, ProgramInfo} from './types';
+
+/**
+ * ProgramManager is the main class behind running computations
+ * It builds ProgramInfo's into Artifacts
+ * It compiles given ProgramInfo's into WebGL Prorams (cached as Artifacts)
+ * Uses the artifact to run the computation by calling Draw on
+ * the WebGL drawing buffer
+ * ProgramManager automatically maps (binds) input variables to their
+ * corresponding Location's in the binary program
+ */
+export class ProgramManager {
+  repo: Map<unknown, Artifact>;  // this should be per-session object
+  attributesBound: boolean;
+
+  constructor(private device: GPUDevice, public profiler: Readonly<Profiler>) {
+    this.repo = new Map();
+    this.attributesBound = false;
+  }
+  getArtifact(key: unknown): Artifact|undefined {
+    return this.repo.get(key);
+  }
+  setArtifact(key: unknown, artifact: Artifact): void {
+    this.repo.set(key, artifact);
+  }
+  run(buildArtifact: Artifact, inputs: GpuData[], outputs: GpuData[],
+      dispatchGroup: {x: number; y?: number; z?: number}): void {
+    const device = this.device;
+
+    // TODO: should we create command encoder every time?
+
+    const commandEncoder = device.createCommandEncoder();
+
+    const passEncoder = commandEncoder.beginComputePass();
+    passEncoder.setPipeline(buildArtifact.computePipeline);
+    const entries = [];
+    for (const input of inputs) {
+      entries.push({binding: entries.length, resource: {buffer: input.buffer}});
+    }
+    for (const output of outputs) {
+      entries.push({binding: entries.length, resource: {buffer: output.buffer}});
+    }
+    const bindGroup = device.createBindGroup({layout: buildArtifact.computePipeline.getBindGroupLayout(0), entries});
+    passEncoder.setBindGroup(0, bindGroup);
+
+    const {x, y, z} = dispatchGroup;
+    passEncoder.dispatch(x, y, z);
+
+    passEncoder.endPass();
+
+    device.queue.submit([commandEncoder.finish()]);
+  }
+  dispose(): void {
+    // this.repo.forEach(a => this.glContext.deleteProgram(a.program));
+  }
+  build(programInfo: ProgramInfo): Artifact {
+    const device = this.device;
+
+    const shaderModule = device.createShaderModule({code: programInfo.shaderSource});
+
+    const computePipeline = device.createComputePipeline({compute: {module: shaderModule, entryPoint: 'main'}});
+
+    return {programInfo, computePipeline};
+  }
+}
diff --git a/js/web/lib/onnxjs/backends/webgpu/session-handler.ts b/js/web/lib/onnxjs/backends/webgpu/session-handler.ts
@@ -8,15 +8,20 @@ import {OpSet, resolveOperator} from '../../opset';
 import {Session} from '../../session';
 import {Tensor} from '../../tensor';
 import {WebGpuBackend} from '../backend-webgpu';
-import {WebGpuInferenceHandler} from './inference-handler';
 
+import {createGpuDataManager, GpuDataManager} from './gpu-data-manager';
+import {WebGpuInferenceHandler} from './inference-handler';
 import {WEBGPU_OP_RESOLVE_RULES} from './op-resolve-rules';
+import {ProgramManager} from './program-manager';
 
 export class WebGpuSessionHandler implements SessionHandler {
   private initializers: Set<Tensor.Id>;
+  readonly dataManager: GpuDataManager;
+  programManager: ProgramManager;
 
   constructor(public readonly backend: WebGpuBackend, public readonly context: Session.Context) {
-    // TODO
+    this.dataManager = createGpuDataManager(this.backend.device);
+    this.programManager = new ProgramManager(this.backend.device, this.context.profiler);
   }
 
   createInferenceHandler() {