Skip to content

Commit

Permalink
[webgpu] Create tensor from GPUBuffer (#7034)
Browse files Browse the repository at this point in the history
Fix: #6232
  • Loading branch information
axinging authored Nov 23, 2022
1 parent 0906427 commit 775619c
Show file tree
Hide file tree
Showing 11 changed files with 406 additions and 46 deletions.
5 changes: 3 additions & 2 deletions tfjs-backend-webgl/src/backend_webgl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1304,8 +1304,9 @@ export class MathBackendWebGL extends KernelBackend {
* Create a TF.js tensor out of an existing WebGL texture. A new texture will
* be created.
*/
override createTensorFromTexture(values: WebGLData, shape: number[],
dtype: DataType): Tensor {
override createTensorFromGPUData(
values: WebGLData, shape: number[], dtype: DataType): Tensor {
values.channels = values.channels || 'RGBA';
const {texture, height, width, channels} = values;
const backend = engine().backend as MathBackendWebGL;

Expand Down
65 changes: 60 additions & 5 deletions tfjs-backend-webgpu/src/backend_webgpu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

import './flags_webgpu';

import {backend_util, buffer, DataStorage, DataType, engine, env, GPUData, KernelBackend, Rank, RecursiveArray, ShapeMap, TensorBuffer, TensorInfo, TimingInfo, TypedArray, util} from '@tensorflow/tfjs-core';
import {backend_util, buffer, DataStorage, DataType, engine, env, GPUData, KernelBackend, Rank, RecursiveArray, ShapeMap, Tensor, TensorBuffer, TensorInfo, TimingInfo, TypedArray, util, WebGPUData} from '@tensorflow/tfjs-core';

import {AdapterInfo} from './adapter_info';
import {BufferManager} from './buffer_manager';
Expand Down Expand Up @@ -51,6 +51,9 @@ type TensorData = {
shape: number[],
refCount: number,
resourceInfo?: BufferInfo|TextureInfo,
// external is true means we use the resource provided by users directly
// (without a copy), so users should be responsible for its release.
external?: boolean,
// For complex numbers, the real and imaginary parts are stored as their own
// individual tensors, with a parent joining the two with the
// complexTensorInfos field.
Expand Down Expand Up @@ -242,6 +245,11 @@ export class WebGPUBackend extends KernelBackend {
if (!tensorData || !tensorData.resourceInfo) {
return;
}
// If tensor's resource is from external, do not release.
if (tensorData.external) {
tensorData.resourceInfo = null;
return;
}
if ('texture' in tensorData.resourceInfo) {
const textureInfo = tensorData.resourceInfo;
if (textureInfo.texture instanceof GPUTexture) {
Expand Down Expand Up @@ -282,7 +290,8 @@ export class WebGPUBackend extends KernelBackend {
}
}

override write(values: backend_util.BackendValues, shape: number[],
override write(
values: backend_util.BackendValues, shape: number[],
dtype: DataType): DataId {
if (dtype === 'complex64' && values != null) {
throw new Error(
Expand Down Expand Up @@ -437,6 +446,53 @@ export class WebGPUBackend extends KernelBackend {
return vals;
}

// The source GPUBuffer and destination GPUBuffer have the same size and
// usage.
private copyBuffer(srcBuffer: GPUBuffer, size: number, usage: number) {
const dstBuffer = this.bufferManager.acquireBuffer(size, usage);
this.ensureCommandEncoderReady();
this.ensureComputePassEnded();
this.currentCommandEncoder.copyBufferToBuffer(
srcBuffer, 0, dstBuffer, 0, size);
this.submitQueue();
return dstBuffer;
}

/**
* Create a TF.js tensor out of an existing WebGPU buffer.
*/
override createTensorFromGPUData(
values: WebGPUData, shape: number[], dtype: DataType): Tensor {
let buffer = values.buffer;
if (dtype === 'complex64') {
throw new Error(`Cannot write to a complex64 dtype. `);
}
const dataId = {id: this.nextDataId()};
this.tensorMap.set(
dataId,
{dtype, shape, values: null, refCount: 1, external: values.zeroCopy});
const tensorData = this.tensorMap.get(dataId);
const size = webgpu_util.GPUBytesPerElement(tensorData.dtype) *
util.sizeFromShape(tensorData.shape);
if (values.buffer.size < size) {
throw new Error(`GPUBuffer size(${
values.buffer.size}) is smaller than tensor size(${size})!`);
} else if (
(values.buffer.usage &
(GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC)) !==
(GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC)) {
throw new Error(
'GPUBuffer.usage should include GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC!');
}

// Do buffer copy by default.
if (values.zeroCopy !== true) {
buffer = this.copyBuffer(buffer, size, buffer.usage);
}
tensorData.resourceInfo = {size: buffer.size, usage: buffer.usage, buffer};
return engine().makeTensorFromDataId(dataId, shape, dtype, this);
}

/**
* Read tensor to a new GPUBuffer.
* @param dataId The source tensor.
Expand Down Expand Up @@ -623,9 +679,8 @@ export class WebGPUBackend extends KernelBackend {
// TODO: WebGPU doesn't support read data synchronously from GPU to CPU.
// So it will report error when switching backend from WebGPU to others.
// There are two situations: 1) swithcing the backend after running a
// model; 2) swithcing the backend within the model. Temporarilly keep the
// values on CPU to solve the first issue.
// tensorData.values = null;
// model; 2) swithcing the backend within the model. Temporarilly keep
// the values on CPU to solve the first issue. tensorData.values = null;
}
}

Expand Down
204 changes: 204 additions & 0 deletions tfjs-backend-webgpu/src/backend_webgpu_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -366,3 +366,207 @@ describeWebGPU('keeping data on gpu ', () => {
expect(endDataBuckets).toEqual(startDataBuckets + 1);
});
});

function createStagingGPUBufferFromData(
device: GPUDevice, data: number[], dtype: tf.DataType) {
const bytesPerElement = 4;
const sizeInBytes = data.length * bytesPerElement;

const gpuWriteBuffer = device.createBuffer({
mappedAtCreation: true,
size: sizeInBytes,
usage: GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC
});
const arrayBuffer = gpuWriteBuffer.getMappedRange();
if (dtype === 'float32') {
new Float32Array(arrayBuffer).set(data);
} else if (dtype === 'int32') {
new Int32Array(arrayBuffer).set(data);
} else {
throw new Error(
`Creating tensor from GPUBuffer only supports` +
`'float32'|'int32' dtype, while the dtype is ${dtype}.`);
}
gpuWriteBuffer.unmap();
return gpuWriteBuffer;
}

function createGPUBufferFromData(
device: GPUDevice, data: number[], dtype: tf.DataType,
bufferUsage = GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE |
GPUBufferUsage.COPY_SRC) {
const bytesPerElement = 4;
const sizeInBytes = data.length * bytesPerElement;

const gpuWriteBuffer = createStagingGPUBufferFromData(device, data, dtype);
const gpuReadBuffer = device.createBuffer(
{mappedAtCreation: false, size: sizeInBytes, usage: bufferUsage});

const copyEncoder = device.createCommandEncoder();
copyEncoder.copyBufferToBuffer(
gpuWriteBuffer, 0, gpuReadBuffer, 0, sizeInBytes);
const copyCommands = copyEncoder.finish();
device.queue.submit([copyCommands]);
gpuWriteBuffer.destroy();
return gpuReadBuffer;
}

async function testCreateTensorFromGPUBuffer(
dtype: tf.DataType, useDefaultShapeAndType = false, zeroCopy = false) {
const webGPUBackend = tf.backend() as WebGPUBackend;
const device = webGPUBackend.device;
const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
const bData = [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4];
const expected = [2, 4, 6, 8, 6, 8, 10, 12, 10, 12, 14, 16, 14, 16, 18, 20];
const aBuffer = createGPUBufferFromData(device, aData, dtype);
const shape: number[] = [aData.length];
const startNumBytes = tf.memory().numBytes;
const startNumTensors = tf.memory().numTensors;
const webGPUData = {buffer: aBuffer, zeroCopy};
const a = useDefaultShapeAndType ? tf.tensor(webGPUData) :
tf.tensor(webGPUData, shape, dtype);
if (zeroCopy !== true) {
aBuffer.destroy();
}
const b = tf.tensor(bData, shape, dtype);
const result = tf.add(a, b);
tf.test_util.expectArraysClose(await result.data(), expected);
a.dispose();
b.dispose();
result.dispose();
const endNumBytes = tf.memory().numBytes;
const endNumTensors = tf.memory().numTensors;
expect(endNumBytes - startNumBytes).toEqual(0);
expect(endNumTensors - startNumTensors).toEqual(0);
if (zeroCopy === true) {
aBuffer.destroy();
}
}

function createTensorFromGPUTest(zeroCopy = false) {
it('use default shape and data type(float32)', async () => {
await testCreateTensorFromGPUBuffer('float32', true, zeroCopy);
});

it('work for float32', async () => {
await testCreateTensorFromGPUBuffer('float32', false, zeroCopy);
});

it('work for int32', async () => {
await testCreateTensorFromGPUBuffer('int32', false, zeroCopy);
});

it('work for read', async () => {
const webGPUBackend = tf.backend() as WebGPUBackend;
const device = webGPUBackend.device;
const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
const dtype = 'float32';
const aBuffer = createGPUBufferFromData(device, aData, dtype);
const shape: number[] = [aData.length];
const a = tf.tensor({buffer: aBuffer, zeroCopy}, shape, dtype);
if (zeroCopy !== true) {
aBuffer.destroy();
}
await a.data();
if (zeroCopy === true) {
aBuffer.destroy();
}
});

it('two tensors share the same GPUBuffer', async () => {
const webGPUBackend = tf.backend() as WebGPUBackend;
const device = webGPUBackend.device;
const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
const dtype = 'float32';
const aBuffer = createGPUBufferFromData(device, aData, dtype);
const startNumBytes = tf.memory().numBytes;
const startNumTensors = tf.memory().numTensors;
const shape: number[] = [aData.length];
const webGPUData = {buffer: aBuffer, zeroCopy};
const a = tf.tensor(webGPUData, shape, dtype);
const b = tf.tensor(webGPUData, shape, dtype);
if (zeroCopy !== true) {
aBuffer.destroy();
}
const result = tf.add(a, b);
const expected =
[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32];
tf.test_util.expectArraysClose(await result.data(), expected);
a.dispose();
b.dispose();
result.dispose();
const endNumBytes = tf.memory().numBytes;
const endNumTensors = tf.memory().numTensors;
expect(endNumBytes - startNumBytes).toEqual(0);
expect(endNumTensors - startNumTensors).toEqual(0);
if (zeroCopy === true) {
aBuffer.destroy();
}
});

it('GPUBuffer size is bigger than tensor size', async () => {
const webGPUBackend = tf.backend() as WebGPUBackend;
const device = webGPUBackend.device;
const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
const dtype = 'float32';
const aBuffer = createGPUBufferFromData(device, aData, dtype);
const startNumBytes = tf.memory().numBytes;
const startNumTensors = tf.memory().numTensors;
// GPUBuffer.size is bigger than shape size
const shape: number[] = [aData.length - 1];
const webGPUData = {buffer: aBuffer, zeroCopy};
const a = tf.tensor(webGPUData, shape, dtype);
const b = tf.tensor(webGPUData, shape, dtype);
if (zeroCopy !== true) {
aBuffer.destroy();
}
const result = tf.add(a, b);
const expected = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30];
tf.test_util.expectArraysClose(await result.data(), expected);
a.dispose();
b.dispose();
result.dispose();
const endNumBytes = tf.memory().numBytes;
const endNumTensors = tf.memory().numTensors;
expect(endNumBytes - startNumBytes).toEqual(0);
expect(endNumTensors - startNumTensors).toEqual(0);
if (zeroCopy === true) {
aBuffer.destroy();
}
});

it('throw when GPUBuffer size is smaller than tensor size', async () => {
const webGPUBackend = tf.backend() as WebGPUBackend;
const device = webGPUBackend.device;
const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
const dtype = 'float32';
const aBuffer = createGPUBufferFromData(device, aData, dtype);
// Throw when GPUBuffer.size is smaller than shape size
const shape: number[] = [aData.length + 1];
const a = () => tf.tensor({buffer: aBuffer}, shape, dtype);
expect(a).toThrowError();
aBuffer.destroy();
});

it('throw when GPUBuffer usage is not correct', async () => {
const webGPUBackend = tf.backend() as WebGPUBackend;
const device = webGPUBackend.device;
const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
const dtype = 'float32';
// Create a GPUBuffer without GPUBufferUsage.STORAGE.
const aBuffer = createStagingGPUBufferFromData(device, aData, dtype);
// Throw when GPUBuffer usage is not correct.
const shape: number[] = [aData.length];
const a = () => tf.tensor({buffer: aBuffer, zeroCopy}, shape, dtype);
expect(a).toThrowError();
aBuffer.destroy();
});
}

describeWebGPU('create tensor from GPUBuffer', () => {
createTensorFromGPUTest();
});

describeWebGPU('create tensor from GPUBuffer with zero copy', () => {
createTensorFromGPUTest(true);
});
3 changes: 1 addition & 2 deletions tfjs-backend-webgpu/src/flags_webgpu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,7 @@ ENV.registerFlag('WEBGPU_USE_NAIVE_CONV2D_DEBUG', () => false);
* are dispatched, it means the hardware may be in low occupancy.
* 0 means it's not set by the user. A default strategy will be applied.
*/
ENV.registerFlag(
'WEBGPU_THRESHOLD_TO_INCREASE_WORKGROUPS_FOR_MATMUL', () => 0);
ENV.registerFlag('WEBGPU_THRESHOLD_TO_INCREASE_WORKGROUPS_FOR_MATMUL', () => 0);

/**
* Whether we will run im2col as a separate shader for convolution.
Expand Down
10 changes: 6 additions & 4 deletions tfjs-core/src/backends/backend.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

import {Backend, DataToGPUOptions, GPUData, Tensor} from '../tensor';
import {DataId} from '../tensor_info';
import {BackendValues, DataType, WebGLData} from '../types';
import {BackendValues, DataType, WebGLData, WebGPUData} from '../types';

export const EPSILON_FLOAT32 = 1e-7;
export const EPSILON_FLOAT16 = 1e-4;
Expand Down Expand Up @@ -133,10 +133,12 @@ export class KernelBackend implements TensorStorage, Backend, BackendTimer {
refCount: number): void {
return notYetImplemented('move');
}
createTensorFromTexture(values: WebGLData, shape: number[], dtype: DataType):
Tensor {
return notYetImplemented('createTensorFromTexture');

createTensorFromGPUData(
values: WebGLData|WebGPUData, shape: number[], dtype: DataType): Tensor {
return notYetImplemented('createTensorFromGPUData');
}

memory(): {unreliable: boolean; reasons?: string[]} {
return notYetImplemented('memory');
}
Expand Down
2 changes: 1 addition & 1 deletion tfjs-core/src/base.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ export {RMSPropOptimizer} from './optimizers/rmsprop_optimizer';
export {SGDOptimizer} from './optimizers/sgd_optimizer';
export {DataToGPUOptions, DataToGPUWebGLOption, GPUData, Scalar, Tensor, Tensor1D, Tensor2D, Tensor3D, Tensor4D, Tensor5D, TensorBuffer, Variable} from './tensor';
export {GradSaveFunc, NamedTensorMap, TensorContainer, TensorContainerArray, TensorContainerObject} from './tensor_types';
export {BackendValues, DataType, DataTypeMap, DataValues, NumericDataType, PixelData, Rank, RecursiveArray, ScalarLike, ShapeMap, sumOutType, TensorLike, TypedArray, upcastType, WebGLData} from './types';
export {BackendValues, DataType, DataTypeMap, DataValues, NumericDataType, PixelData, Rank, RecursiveArray, ScalarLike, ShapeMap, sumOutType, TensorLike, TypedArray, upcastType, WebGLData, WebGPUData} from './types';

export * from './ops/ops';
export {Reduction} from './ops/loss_ops_utils';
Expand Down
Loading

0 comments on commit 775619c

Please sign in to comment.