Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[webgpu] Create tensor from GPUBuffer #7034

Merged
merged 9 commits into from
Nov 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions tfjs-backend-webgl/src/backend_webgl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1304,8 +1304,9 @@ export class MathBackendWebGL extends KernelBackend {
* Create a TF.js tensor out of an existing WebGL texture. A new texture will
* be created.
*/
override createTensorFromTexture(values: WebGLData, shape: number[],
dtype: DataType): Tensor {
override createTensorFromGPUData(
values: WebGLData, shape: number[], dtype: DataType): Tensor {
values.channels = values.channels || 'RGBA';
const {texture, height, width, channels} = values;
const backend = engine().backend as MathBackendWebGL;

Expand Down
65 changes: 60 additions & 5 deletions tfjs-backend-webgpu/src/backend_webgpu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

import './flags_webgpu';

import {backend_util, buffer, DataStorage, DataType, engine, env, GPUData, KernelBackend, Rank, RecursiveArray, ShapeMap, TensorBuffer, TensorInfo, TimingInfo, TypedArray, util} from '@tensorflow/tfjs-core';
import {backend_util, buffer, DataStorage, DataType, engine, env, GPUData, KernelBackend, Rank, RecursiveArray, ShapeMap, Tensor, TensorBuffer, TensorInfo, TimingInfo, TypedArray, util, WebGPUData} from '@tensorflow/tfjs-core';

import {AdapterInfo} from './adapter_info';
import {BufferManager} from './buffer_manager';
Expand Down Expand Up @@ -51,6 +51,9 @@ type TensorData = {
shape: number[],
refCount: number,
resourceInfo?: BufferInfo|TextureInfo,
// external is true means we use the resource provided by users directly
// (without a copy), so users should be responsible for its release.
external?: boolean,
// For complex numbers, the real and imaginary parts are stored as their own
// individual tensors, with a parent joining the two with the
// complexTensorInfos field.
Expand Down Expand Up @@ -242,6 +245,11 @@ export class WebGPUBackend extends KernelBackend {
if (!tensorData || !tensorData.resourceInfo) {
return;
}
// If tensor's resource is from external, do not release.
if (tensorData.external) {
tensorData.resourceInfo = null;
return;
}
if ('texture' in tensorData.resourceInfo) {
const textureInfo = tensorData.resourceInfo;
if (textureInfo.texture instanceof GPUTexture) {
Expand Down Expand Up @@ -282,7 +290,8 @@ export class WebGPUBackend extends KernelBackend {
}
}

override write(values: backend_util.BackendValues, shape: number[],
override write(
values: backend_util.BackendValues, shape: number[],
dtype: DataType): DataId {
if (dtype === 'complex64' && values != null) {
throw new Error(
Expand Down Expand Up @@ -437,6 +446,53 @@ export class WebGPUBackend extends KernelBackend {
return vals;
}

// The source GPUBuffer and destination GPUBuffer have the same size and
// usage.
private copyBuffer(srcBuffer: GPUBuffer, size: number, usage: number) {
const dstBuffer = this.bufferManager.acquireBuffer(size, usage);
this.ensureCommandEncoderReady();
this.ensureComputePassEnded();
this.currentCommandEncoder.copyBufferToBuffer(
srcBuffer, 0, dstBuffer, 0, size);
this.submitQueue();
return dstBuffer;
}

/**
* Create a TF.js tensor out of an existing WebGPU buffer.
*/
override createTensorFromGPUData(
values: WebGPUData, shape: number[], dtype: DataType): Tensor {
let buffer = values.buffer;
if (dtype === 'complex64') {
throw new Error(`Cannot write to a complex64 dtype. `);
}
const dataId = {id: this.nextDataId()};
this.tensorMap.set(
dataId,
{dtype, shape, values: null, refCount: 1, external: values.zeroCopy});
const tensorData = this.tensorMap.get(dataId);
const size = webgpu_util.GPUBytesPerElement(tensorData.dtype) *
util.sizeFromShape(tensorData.shape);
if (values.buffer.size < size) {
throw new Error(`GPUBuffer size(${
values.buffer.size}) is smaller than tensor size(${size})!`);
} else if (
(values.buffer.usage &
(GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC)) !==
(GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC)) {
throw new Error(
'GPUBuffer.usage should include GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC!');
}

// Do buffer copy by default.
if (values.zeroCopy !== true) {
buffer = this.copyBuffer(buffer, size, buffer.usage);
}
tensorData.resourceInfo = {size: buffer.size, usage: buffer.usage, buffer};
return engine().makeTensorFromDataId(dataId, shape, dtype, this);
}

/**
* Read tensor to a new GPUBuffer.
* @param dataId The source tensor.
Expand Down Expand Up @@ -623,9 +679,8 @@ export class WebGPUBackend extends KernelBackend {
// TODO: WebGPU doesn't support read data synchronously from GPU to CPU.
// So it will report error when switching backend from WebGPU to others.
// There are two situations: 1) swithcing the backend after running a
// model; 2) swithcing the backend within the model. Temporarilly keep the
// values on CPU to solve the first issue.
// tensorData.values = null;
// model; 2) swithcing the backend within the model. Temporarilly keep
// the values on CPU to solve the first issue. tensorData.values = null;
}
}

Expand Down
204 changes: 204 additions & 0 deletions tfjs-backend-webgpu/src/backend_webgpu_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -366,3 +366,207 @@ describeWebGPU('keeping data on gpu ', () => {
expect(endDataBuckets).toEqual(startDataBuckets + 1);
});
});

function createStagingGPUBufferFromData(
device: GPUDevice, data: number[], dtype: tf.DataType) {
const bytesPerElement = 4;
const sizeInBytes = data.length * bytesPerElement;

const gpuWriteBuffer = device.createBuffer({
mappedAtCreation: true,
size: sizeInBytes,
usage: GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC
});
const arrayBuffer = gpuWriteBuffer.getMappedRange();
if (dtype === 'float32') {
new Float32Array(arrayBuffer).set(data);
} else if (dtype === 'int32') {
new Int32Array(arrayBuffer).set(data);
} else {
throw new Error(
`Creating tensor from GPUBuffer only supports` +
`'float32'|'int32' dtype, while the dtype is ${dtype}.`);
}
gpuWriteBuffer.unmap();
return gpuWriteBuffer;
}

function createGPUBufferFromData(
device: GPUDevice, data: number[], dtype: tf.DataType,
bufferUsage = GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE |
GPUBufferUsage.COPY_SRC) {
const bytesPerElement = 4;
const sizeInBytes = data.length * bytesPerElement;

const gpuWriteBuffer = createStagingGPUBufferFromData(device, data, dtype);
const gpuReadBuffer = device.createBuffer(
{mappedAtCreation: false, size: sizeInBytes, usage: bufferUsage});

const copyEncoder = device.createCommandEncoder();
copyEncoder.copyBufferToBuffer(
gpuWriteBuffer, 0, gpuReadBuffer, 0, sizeInBytes);
const copyCommands = copyEncoder.finish();
device.queue.submit([copyCommands]);
gpuWriteBuffer.destroy();
return gpuReadBuffer;
}

async function testCreateTensorFromGPUBuffer(
dtype: tf.DataType, useDefaultShapeAndType = false, zeroCopy = false) {
const webGPUBackend = tf.backend() as WebGPUBackend;
const device = webGPUBackend.device;
const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
const bData = [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4];
const expected = [2, 4, 6, 8, 6, 8, 10, 12, 10, 12, 14, 16, 14, 16, 18, 20];
const aBuffer = createGPUBufferFromData(device, aData, dtype);
const shape: number[] = [aData.length];
const startNumBytes = tf.memory().numBytes;
const startNumTensors = tf.memory().numTensors;
const webGPUData = {buffer: aBuffer, zeroCopy};
const a = useDefaultShapeAndType ? tf.tensor(webGPUData) :
tf.tensor(webGPUData, shape, dtype);
if (zeroCopy !== true) {
aBuffer.destroy();
}
const b = tf.tensor(bData, shape, dtype);
const result = tf.add(a, b);
tf.test_util.expectArraysClose(await result.data(), expected);
a.dispose();
b.dispose();
result.dispose();
const endNumBytes = tf.memory().numBytes;
const endNumTensors = tf.memory().numTensors;
expect(endNumBytes - startNumBytes).toEqual(0);
expect(endNumTensors - startNumTensors).toEqual(0);
if (zeroCopy === true) {
aBuffer.destroy();
}
}

function createTensorFromGPUTest(zeroCopy = false) {
it('use default shape and data type(float32)', async () => {
await testCreateTensorFromGPUBuffer('float32', true, zeroCopy);
});

it('work for float32', async () => {
await testCreateTensorFromGPUBuffer('float32', false, zeroCopy);
});

it('work for int32', async () => {
await testCreateTensorFromGPUBuffer('int32', false, zeroCopy);
});

it('work for read', async () => {
const webGPUBackend = tf.backend() as WebGPUBackend;
const device = webGPUBackend.device;
const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
const dtype = 'float32';
const aBuffer = createGPUBufferFromData(device, aData, dtype);
const shape: number[] = [aData.length];
const a = tf.tensor({buffer: aBuffer, zeroCopy}, shape, dtype);
if (zeroCopy !== true) {
aBuffer.destroy();
}
await a.data();
if (zeroCopy === true) {
aBuffer.destroy();
}
});

it('two tensors share the same GPUBuffer', async () => {
const webGPUBackend = tf.backend() as WebGPUBackend;
const device = webGPUBackend.device;
const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
const dtype = 'float32';
const aBuffer = createGPUBufferFromData(device, aData, dtype);
const startNumBytes = tf.memory().numBytes;
const startNumTensors = tf.memory().numTensors;
const shape: number[] = [aData.length];
const webGPUData = {buffer: aBuffer, zeroCopy};
const a = tf.tensor(webGPUData, shape, dtype);
const b = tf.tensor(webGPUData, shape, dtype);
if (zeroCopy !== true) {
aBuffer.destroy();
}
const result = tf.add(a, b);
const expected =
[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32];
tf.test_util.expectArraysClose(await result.data(), expected);
a.dispose();
b.dispose();
result.dispose();
const endNumBytes = tf.memory().numBytes;
const endNumTensors = tf.memory().numTensors;
expect(endNumBytes - startNumBytes).toEqual(0);
expect(endNumTensors - startNumTensors).toEqual(0);
if (zeroCopy === true) {
aBuffer.destroy();
}
});
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please also add tests for buffer.size > shape and buffer.size < shape.


it('GPUBuffer size is bigger than tensor size', async () => {
const webGPUBackend = tf.backend() as WebGPUBackend;
const device = webGPUBackend.device;
const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
const dtype = 'float32';
const aBuffer = createGPUBufferFromData(device, aData, dtype);
const startNumBytes = tf.memory().numBytes;
const startNumTensors = tf.memory().numTensors;
// GPUBuffer.size is bigger than shape size
const shape: number[] = [aData.length - 1];
const webGPUData = {buffer: aBuffer, zeroCopy};
const a = tf.tensor(webGPUData, shape, dtype);
const b = tf.tensor(webGPUData, shape, dtype);
if (zeroCopy !== true) {
aBuffer.destroy();
}
const result = tf.add(a, b);
const expected = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30];
tf.test_util.expectArraysClose(await result.data(), expected);
a.dispose();
b.dispose();
result.dispose();
const endNumBytes = tf.memory().numBytes;
const endNumTensors = tf.memory().numTensors;
expect(endNumBytes - startNumBytes).toEqual(0);
expect(endNumTensors - startNumTensors).toEqual(0);
if (zeroCopy === true) {
aBuffer.destroy();
}
});

it('throw when GPUBuffer size is smaller than tensor size', async () => {
const webGPUBackend = tf.backend() as WebGPUBackend;
const device = webGPUBackend.device;
const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
const dtype = 'float32';
const aBuffer = createGPUBufferFromData(device, aData, dtype);
// Throw when GPUBuffer.size is smaller than shape size
const shape: number[] = [aData.length + 1];
const a = () => tf.tensor({buffer: aBuffer}, shape, dtype);
expect(a).toThrowError();
aBuffer.destroy();
});

it('throw when GPUBuffer usage is not correct', async () => {
const webGPUBackend = tf.backend() as WebGPUBackend;
const device = webGPUBackend.device;
const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
const dtype = 'float32';
// Create a GPUBuffer without GPUBufferUsage.STORAGE.
const aBuffer = createStagingGPUBufferFromData(device, aData, dtype);
// Throw when GPUBuffer usage is not correct.
const shape: number[] = [aData.length];
const a = () => tf.tensor({buffer: aBuffer, zeroCopy}, shape, dtype);
expect(a).toThrowError();
aBuffer.destroy();
});
}

describeWebGPU('create tensor from GPUBuffer', () => {
createTensorFromGPUTest();
});

describeWebGPU('create tensor from GPUBuffer with zero copy', () => {
createTensorFromGPUTest(true);
});
3 changes: 1 addition & 2 deletions tfjs-backend-webgpu/src/flags_webgpu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,7 @@ ENV.registerFlag('WEBGPU_USE_NAIVE_CONV2D_DEBUG', () => false);
* are dispatched, it means the hardware may be in low occupancy.
* 0 means it's not set by the user. A default strategy will be applied.
*/
ENV.registerFlag(
'WEBGPU_THRESHOLD_TO_INCREASE_WORKGROUPS_FOR_MATMUL', () => 0);
ENV.registerFlag('WEBGPU_THRESHOLD_TO_INCREASE_WORKGROUPS_FOR_MATMUL', () => 0);

/**
* Whether we will run im2col as a separate shader for convolution.
Expand Down
10 changes: 6 additions & 4 deletions tfjs-core/src/backends/backend.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

import {Backend, DataToGPUOptions, GPUData, Tensor} from '../tensor';
import {DataId} from '../tensor_info';
import {BackendValues, DataType, WebGLData} from '../types';
import {BackendValues, DataType, WebGLData, WebGPUData} from '../types';

export const EPSILON_FLOAT32 = 1e-7;
export const EPSILON_FLOAT16 = 1e-4;
Expand Down Expand Up @@ -133,10 +133,12 @@ export class KernelBackend implements TensorStorage, Backend, BackendTimer {
refCount: number): void {
return notYetImplemented('move');
}
createTensorFromTexture(values: WebGLData, shape: number[], dtype: DataType):
Tensor {
return notYetImplemented('createTensorFromTexture');

createTensorFromGPUData(
values: WebGLData|WebGPUData, shape: number[], dtype: DataType): Tensor {
return notYetImplemented('createTensorFromGPUData');
}

memory(): {unreliable: boolean; reasons?: string[]} {
return notYetImplemented('memory');
}
Expand Down
2 changes: 1 addition & 1 deletion tfjs-core/src/base.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ export {RMSPropOptimizer} from './optimizers/rmsprop_optimizer';
export {SGDOptimizer} from './optimizers/sgd_optimizer';
export {DataToGPUOptions, DataToGPUWebGLOption, GPUData, Scalar, Tensor, Tensor1D, Tensor2D, Tensor3D, Tensor4D, Tensor5D, TensorBuffer, Variable} from './tensor';
export {GradSaveFunc, NamedTensorMap, TensorContainer, TensorContainerArray, TensorContainerObject} from './tensor_types';
export {BackendValues, DataType, DataTypeMap, DataValues, NumericDataType, PixelData, Rank, RecursiveArray, ScalarLike, ShapeMap, sumOutType, TensorLike, TypedArray, upcastType, WebGLData} from './types';
export {BackendValues, DataType, DataTypeMap, DataValues, NumericDataType, PixelData, Rank, RecursiveArray, ScalarLike, ShapeMap, sumOutType, TensorLike, TypedArray, upcastType, WebGLData, WebGPUData} from './types';

export * from './ops/ops';
export {Reduction} from './ops/loss_ops_utils';
Expand Down
Loading