webgpu: Support AdapterInfo (#6862)

Support sequential access mode Use sequential mode for intel device
tensorflow · Sep 30, 2022 · d94faba · d94faba
1 parent 4e122a3
commit d94faba
Show file tree

Hide file tree

Showing 7 changed files with 191 additions and 75 deletions.
diff --git a/tfjs-backend-webgpu/src/adapter_info.ts b/tfjs-backend-webgpu/src/adapter_info.ts
@@ -0,0 +1,37 @@
+/**
+ * @license
+ * Copyright 2022 Google LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+// TODO: Remove it once webgpu/types is successfully upgraded.
+// https://github.com/tensorflow/tfjs/issues/6869
+export interface GPUAdapterInfo {
+  vendor: string;
+  architecture: string;
+}
+
+export class AdapterInfo {
+  private vendor: string;
+
+  constructor(adapterInfo: GPUAdapterInfo) {
+    if (adapterInfo) {
+      this.vendor = adapterInfo.vendor;
+    }
+  }
+
+  isIntel(): boolean {
+    return this.vendor === 'intel';
+  }
+}
diff --git a/tfjs-backend-webgpu/src/backend_webgpu.ts b/tfjs-backend-webgpu/src/backend_webgpu.ts
@@ -19,6 +19,7 @@ import './flags_webgpu';
 
 import {backend_util, buffer, DataStorage, DataType, engine, env, GPUData, KernelBackend, Rank, RecursiveArray, ShapeMap, TensorBuffer, TensorInfo, TimingInfo, TypedArray, util} from '@tensorflow/tfjs-core';
 
+import {AdapterInfo, GPUAdapterInfo} from './adapter_info';
 import {BufferManager} from './buffer_manager';
 import {TextureManager} from './texture_manager';
 import * as webgpu_program from './webgpu_program';
@@ -107,6 +108,7 @@ const reshapeDispatch =
 
 export class WebGPUBackend extends KernelBackend {
   bufferManager: BufferManager;
+  adapterInfo: AdapterInfo;
   device: GPUDevice;
   queue: GPUQueue;
   tensorMap: DataStorage<TensorData>;
@@ -135,7 +137,7 @@ export class WebGPUBackend extends KernelBackend {
     return WebGPUBackend.nextDataId++;
   }
 
-  constructor(device: GPUDevice) {
+  constructor(device: GPUDevice, adapterInfo?: GPUAdapterInfo) {
     super();
     if (!webgpu_util.isWebGPUSupported()) {
       throw new Error('WebGPU is not supported on this device');
@@ -146,6 +148,7 @@ export class WebGPUBackend extends KernelBackend {
     this.currentCommandEncoder = null;
     this.currentComputePass = null;
     this.supportTimeQuery = device.features.has('timestamp-query');
+    this.adapterInfo = new AdapterInfo(adapterInfo);
 
     this.bufferManager = new BufferManager(this.device);
     this.textureManager = new TextureManager(this.device);

diff --git a/tfjs-backend-webgpu/src/base.ts b/tfjs-backend-webgpu/src/base.ts
@@ -50,7 +50,9 @@ if (isWebGPUSupported()) {
       deviceDescriptor.requiredFeatures = ['timestamp-query'];
     }
     const device: GPUDevice = await adapter.requestDevice(deviceDescriptor);
-    return new WebGPUBackend(device);
+    // tslint:disable-next-line:no-any
+    const adapterInfo = await (adapter as any).requestAdapterInfo();
+    return new WebGPUBackend(device, adapterInfo);
   }, 3 /*priority*/);
 }
 

diff --git a/tfjs-backend-webgpu/src/conv2d_mm_webgpu.ts b/tfjs-backend-webgpu/src/conv2d_mm_webgpu.ts
@@ -176,12 +176,13 @@ export class Conv2DMMProgram implements WebGPUProgram {
   tileInner: number;
   innerElementSize: number;
   isVec4?: boolean;
+  private sequentialAccessByThreads: boolean;
 
   constructor(
       convInfo: backend_util.Conv2DInfo, dimAOuter: number, dimBOuter: number,
       dimInner: number, addBias = false,
       activation: backend_util.Activation = null,
-      hasPreluActivationWeights = false) {
+      hasPreluActivationWeights = false, sequentialAccessByThreads = false) {
     this.outputShape = convInfo.outShape;
     this.isChannelsLast = convInfo.dataFormat === 'channelsLast';
     this.isVec4 =
@@ -229,6 +230,7 @@ export class Conv2DMMProgram implements WebGPUProgram {
       }
     }
 
+    this.sequentialAccessByThreads = sequentialAccessByThreads;
     this.addBias = addBias;
     this.activation = activation;
     this.hasPreluActivationWeights = hasPreluActivationWeights;
@@ -244,7 +246,8 @@ export class Conv2DMMProgram implements WebGPUProgram {
 
     this.shaderKey = `conv2DMM_${this.elementsPerThread}_${this.activation}}_${
         this.fitAOuter}_${this.fitBOuter}_${this.fitInner}_${this.isVec4}_${
-        this.innerElementSize}_${this.isChannelsLast}`;
+        this.innerElementSize}_${this.isChannelsLast}_${
+        this.sequentialAccessByThreads}`;
   }
 
   getUserCode(): string {
@@ -254,7 +257,7 @@ export class Conv2DMMProgram implements WebGPUProgram {
             this.tileInner) :
         makeMatMulPackedSource(
             this.elementsPerThread, this.workGroupSize, !this.isChannelsLast,
-            this.tileInner);
+            this.tileInner, false, null, this.sequentialAccessByThreads);
     const elementsSize =
         this.isVec4 ? [this.innerElementSize, 4, 4] : [1, 1, 1];
     const userCode = `

diff --git a/tfjs-backend-webgpu/src/kernels/BatchMatMul_impl.ts b/tfjs-backend-webgpu/src/kernels/BatchMatMul_impl.ts
@@ -184,9 +184,13 @@ export function batchMatMulImpl({
           activation, preluActivationWeights);
       break;
     case MatMulProgramType.MatMulPackedProgram:
+      // Experiments show that sequential access is more friendly for Intel
+      // GPUs.
+      const sequentialAccessByThreads = backend.adapterInfo.isIntel();
       program = new MatMulPackedProgram(
           a3dShape, outputShape, batchAEqualOne, batchBEqualOne, transposeA,
-          transposeB, bias, activation, preluActivationWeights);
+          transposeB, bias, activation, preluActivationWeights,
+          sequentialAccessByThreads);
       break;
     default:
       throw new Error(`Unsupported MatMulProgramType ${matmulProgramType}.`);

diff --git a/tfjs-backend-webgpu/src/kernels/Conv2D_impl.ts b/tfjs-backend-webgpu/src/kernels/Conv2D_impl.ts
@@ -229,9 +229,11 @@ export function conv2DImpl({
         {type: 'int32', data: [dimAOuter]}, {type: 'int32', data: [dimBOuter]},
         {type: 'int32', data: [dimInner]});
 
+    // Experiments show that sequential access is more friendly for Intel GPUs.
+    const sequentialAccessByThreads = backend.adapterInfo.isIntel();
     program = new Conv2DMMProgram(
         convInfo, dimAOuter, dimBOuter, dimInner, hasBias, activation,
-        hasPreluActivationWeights);
+        hasPreluActivationWeights, sequentialAccessByThreads);
   }
 
   const intermediates: TensorInfo[] = [];