From 4ed1bfbb027cfbbe64d962b4f7329d1d0787fe40 Mon Sep 17 00:00:00 2001 From: Yulong Wang <7679871+fs-eire@users.noreply.github.com> Date: Tue, 28 Jun 2022 14:23:08 -0700 Subject: [PATCH] naive conv --- .../backends/webgpu/op-resolve-rules.ts | 4 +- .../lib/onnxjs/backends/webgpu/ops/common.ts | 3 +- .../backends/webgpu/ops/conv-grouped.ts | 127 +++++++++++++++ js/web/lib/onnxjs/backends/webgpu/ops/conv.ts | 150 ++++++++++++++++++ js/web/test/suite-test-list.jsonc | 8 +- 5 files changed, 285 insertions(+), 7 deletions(-) create mode 100644 js/web/lib/onnxjs/backends/webgpu/ops/conv-grouped.ts create mode 100644 js/web/lib/onnxjs/backends/webgpu/ops/conv.ts diff --git a/js/web/lib/onnxjs/backends/webgpu/op-resolve-rules.ts b/js/web/lib/onnxjs/backends/webgpu/op-resolve-rules.ts index c5b59565a68da..2ffd0b7ab0fbf 100644 --- a/js/web/lib/onnxjs/backends/webgpu/op-resolve-rules.ts +++ b/js/web/lib/onnxjs/backends/webgpu/op-resolve-rules.ts @@ -5,6 +5,7 @@ import {OpSet} from '../../opset'; import * as binaryOps from './ops/binary-op'; import {concat, parseConcatAttributes} from './ops/concat'; +import {conv, parseConvAttributes} from './ops/conv'; import {gather, parseGatherAttributes} from './ops/gather'; import {gemm, parseGemmAttributesV11, parseGemmAttributesV7} from './ops/gemm'; import {matMul, parseMatMulAttributes} from './ops/matmul'; @@ -23,8 +24,7 @@ export const WEBGPU_OP_RESOLVE_RULES: readonly OpSet.ResolveRule[] = [ // ['Cast', '', '6+', cast, parseCastAttributes], ['Ceil', '', '6+', unaryOps.ceil], ['Clip', '', '6-10', unaryOps.clip, unaryOps.parseClipAttributes], ['Clip', '', '11+', unaryOps.clipV11], ['Concat', '', '4+', concat, parseConcatAttributes], - // ['Conv', '', '1+', conv, parseConvAttributes], - ['Cos', '', '7+', unaryOps.cos], ['Div', '', '7+', binaryOps.div], + ['Conv', '', '1+', conv, parseConvAttributes], ['Cos', '', '7+', unaryOps.cos], ['Div', '', '7+', binaryOps.div], // ['Dropout', '', '7+', unaryOps.identity], // ['DepthToSpace', '', '1+', depthToSpace, parseDepthToSpaceAttributes], // ['Equal', '', '7+', binaryOps.equal], diff --git a/js/web/lib/onnxjs/backends/webgpu/ops/common.ts b/js/web/lib/onnxjs/backends/webgpu/ops/common.ts index 35c79f705b437..0fef5c059cf27 100644 --- a/js/web/lib/onnxjs/backends/webgpu/ops/common.ts +++ b/js/web/lib/onnxjs/backends/webgpu/ops/common.ts @@ -81,7 +81,8 @@ export const createIndicesHelper = (name: string, shape: readonly number[]) => { const i2oExpression = (varIndices: string, isPtr?: boolean) => shape.length < 2 ? `(${isPtr ? '*' : ''}${varIndices})` : `ih_i2o_${name}(${isPtr ? '' : '&'}${varIndices})`; - const indicesVariableDeclaration = (v: string) => `var ${v}:${iType};`; + const indicesVariableDeclaration = (v: string, init?: string[]) => + `var ${v}:${iType}${init ? `=${iType}(${init.join(',')})` : ''};`; return {o2iImpl, o2iCall, i2oImpl, i2oExpression, indicesVariableDeclaration, iType}; }; diff --git a/js/web/lib/onnxjs/backends/webgpu/ops/conv-grouped.ts b/js/web/lib/onnxjs/backends/webgpu/ops/conv-grouped.ts new file mode 100644 index 0000000000000..6dc13ff600270 --- /dev/null +++ b/js/web/lib/onnxjs/backends/webgpu/ops/conv-grouped.ts @@ -0,0 +1,127 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +import {Logger} from '../../../instrument'; +import {Tensor} from '../../../tensor'; +import {ShapeUtil} from '../../../util'; +import {WebGpuInferenceHandler} from '../inference-handler'; +import {GpuDataType, ProgramInfo, ProgramInfoLoader, ProgramMetadata} from '../types'; + +import {createIndicesHelper, WORKGROUP_SIZE} from './common'; +import {calculateOutputShape, ConvAttributes} from './conv'; +import {getActicationSnippet} from './fuse-utils'; + +const createGroupedConvProgramMetadata = (hasBias: boolean, cacheHint: string): ProgramMetadata => ({ + name: 'GroupedConv', + inputTypes: hasBias ? [GpuDataType.default, GpuDataType.default, GpuDataType.default] : + [GpuDataType.default, GpuDataType.default], + cacheHint +}); + +const createGroupedConvProgramInfo = + (inferenceHandler: WebGpuInferenceHandler, inputs: readonly Tensor[], metadata: ProgramMetadata, + attributes: ConvAttributes): ProgramInfo => { + const hasBias = inputs.length > 2; + const processBias = hasBias ? 'value += b[output_channel];' : ''; + const xShape = inputs[0].dims; + const wShape = inputs[1].dims; + const outputChannelsPerGroup = wShape[0] / attributes.group; + + const dataType = 'f32'; // TODO: support other data type + const {activationFunction, applyActivation} = getActicationSnippet(attributes); + const inputStorageBuffersDeclarations = [ + `@group(0) @binding(0) var x : array<${dataType}>;`, + `@group(0) @binding(1) var w : array<${dataType}>;` + ]; + if (hasBias) { + inputStorageBuffersDeclarations.push(`@group(0) @binding(2) var b : array<${dataType}>;`); + } + + Logger.verbose( + 'GroupedConv', + `autpPad:${attributes.autoPad}, dilations:${attributes.dilations}, group:${attributes.group}, kernelShape:${ + attributes.kernelShape}, pads:${attributes.pads}, strides:${attributes.strides}`); + const outputShape = + calculateOutputShape(xShape, wShape, attributes.dilations, attributes.pads, attributes.strides); + const outputSize = ShapeUtil.size(outputShape); + const outputIndicesHelper = createIndicesHelper('output', outputShape); + const xIndicesHelper = createIndicesHelper('x', xShape); + const wIndicesHelper = createIndicesHelper('w', wShape); + + const shaderSource = ` + let WORKGROUP_SIZE: u32 = ${WORKGROUP_SIZE}u; + let strides: vec2 = vec2(${attributes.strides[0]}u, ${attributes.strides[1]}u); + let pads: vec2 = vec2(${attributes.pads[0]}u, ${attributes.pads[1]}u); + + ${inputStorageBuffersDeclarations.join('\n')} + @group(0) @binding(${inputStorageBuffersDeclarations.length}) var output : array<${dataType}>; + + ${activationFunction} + ${outputIndicesHelper.o2iImpl} + ${xIndicesHelper.i2oImpl} + ${wIndicesHelper.i2oImpl} + + @stage(compute) @workgroup_size(WORKGROUP_SIZE) + fn main(@builtin(global_invocation_id) global_id : vec3) { + // Guard against out-of-bounds work group sizes + if (global_id.x >= ${outputSize}u) { + return; + } + + ${outputIndicesHelper.indicesVariableDeclaration('outputIndices')} + ${outputIndicesHelper.o2iCall('global_id.x', 'outputIndices')} + let batch: u32 = outputIndices[0]; + let output_channel: u32 = outputIndices[1]; + let xRCCorner: vec2 = vec2(outputIndices[2], outputIndices[3]) * strides - pads; + let group_id: u32 = output_channel / ${outputChannelsPerGroup}u; + + var value: ${dataType} = ${dataType}(0); + for (var wInChannel: u32 = 0u; wInChannel < ${wShape[1]}u; wInChannel++) { + let input_channel = group_id * ${wShape[1]}u + wInChannel; + for (var wHeight: u32 = 0u; wHeight < ${wShape[2]}u; wHeight++) { + let xHeight = xRCCorner.x + wHeight * ${attributes.dilations[0]}u; + + if (xHeight < 0u || xHeight >= ${xShape[2]}u) { + continue; + } + + for (var wWidth: u32 = 0u; wWidth < ${wShape[3]}u; wWidth++) { + let xWidth = xRCCorner.y + wWidth * ${attributes.dilations[1]}u; + if (xWidth < 0u || xWidth >= ${xShape[3]}u) { + continue; + } + + ${ + xIndicesHelper.indicesVariableDeclaration( + 'xIndices', + [ + 'batch', 'input_channel', 'xHeight', 'xWidth' + ])} + let xVal = x[${xIndicesHelper.i2oExpression('xIndices')}]; + ${ + wIndicesHelper.indicesVariableDeclaration('wIndices', [ + 'output_channel', 'wInChannel', 'wHeight', 'wWidth' + ])} + let wVal = w[${wIndicesHelper.i2oExpression('wIndices')}]; + value += xVal*wVal; + } + } + } + ${processBias} + ${applyActivation} + output[global_id.x] = value; + }`; + return { + ...metadata, + outputs: [{dims: outputShape, type: inputs[0].type, gpuDataType: GpuDataType.default}], + shaderSource, + dispatchGroup: () => ({x: Math.ceil(outputSize / 64 /* workgroup size */)}) + }; + }; + +export const createGroupedConvProgramInfoLoader = + (inferenceHandler: WebGpuInferenceHandler, inputs: readonly Tensor[], attributes: ConvAttributes): + ProgramInfoLoader => { + const metadata = createGroupedConvProgramMetadata(inputs.length > 2, attributes.cacheKey); + return {...metadata, get: () => createGroupedConvProgramInfo(inferenceHandler, inputs, metadata, attributes)}; + }; diff --git a/js/web/lib/onnxjs/backends/webgpu/ops/conv.ts b/js/web/lib/onnxjs/backends/webgpu/ops/conv.ts new file mode 100644 index 0000000000000..644e9b08c7030 --- /dev/null +++ b/js/web/lib/onnxjs/backends/webgpu/ops/conv.ts @@ -0,0 +1,150 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../../../attribute-with-cache-key'; +import {InferenceHandler} from '../../../backend'; +import {Graph} from '../../../graph'; +import {OperatorAsyncImplementation, OperatorInitialization} from '../../../operators'; +import {Tensor} from '../../../tensor'; +import {PoolConvUtil} from '../../../util'; +import {WebGpuInferenceHandler} from '../inference-handler'; + +import {createGroupedConvProgramInfoLoader} from './conv-grouped'; +// import {createDotProductProgramInfoLoader} from './dot-product'; +import {InternalActivationAttributes, parseInternalActivationAttributes} from './fuse-utils'; + +// import {createIm2ColProgramInfoLoader} from './im2col'; +// import {createMatmulProgramInfoLoader} from './matmul'; + + +export const calculateOutputShape = + (inputShape: readonly number[], kernelShape: readonly number[], dilations: readonly number[], + adjustPads: readonly number[], strides: readonly number[]): number[] => { + const batchSize = inputShape[0]; + const inputSpatialShape = inputShape.slice(2); + const spatialRank = inputSpatialShape.length; + const outChannels = kernelShape[0]; + const kernelSpatialShape = kernelShape.slice(2); + const dilatedKernelShape = kernelSpatialShape.map((v, i) => v + (v - 1) * (dilations[i] - 1)); + const inputSpatialShapeWithPad = inputSpatialShape.map((v, i) => v + adjustPads[i] + adjustPads[i + spatialRank]); + const outputSpatialShape = + inputSpatialShapeWithPad.map((v, i) => Math.floor((v - dilatedKernelShape[i] + strides[i]) / strides[i])); + const outputShape = [batchSize, outChannels].concat(...outputSpatialShape); + return outputShape; + }; + +export interface ConvAttributes extends InternalActivationAttributes, AttributeWithCacheKey { + readonly autoPad: string; + readonly dilations: readonly number[]; + readonly group: number; + readonly kernelShape: readonly number[]; + readonly pads: readonly number[]; + readonly strides: readonly number[]; +} + +export const conv: OperatorAsyncImplementation = + async(inferenceHandler: InferenceHandler, inputs: Tensor[], attributes: ConvAttributes): Promise => { + validateInputs(inputs, attributes); // currently will fail if not conv2D + return conv2d(inferenceHandler, inputs, attributes); +}; + +const conv2d: OperatorAsyncImplementation = async( + inferenceHandler: WebGpuInferenceHandler, inputs: Tensor[], attributes: ConvAttributes): Promise => { + const adjustedAttributes = getAdjustedConvAttributes(attributes, inputs); + // const isPointwise = adjustedAttributes.kernelShape[0] === 1 && adjustedAttributes.kernelShape[1] === 1; + // if (adjustedAttributes.group > 1) { + return inferenceHandler.run(createGroupedConvProgramInfoLoader(inferenceHandler, inputs, adjustedAttributes), inputs); + // } else if (isPointwise) { + // return conv2DPointwise(inferenceHandler, inputs, adjustedAttributes); + // } else { + // return conv2D(inferenceHandler, inputs, adjustedAttributes); + // } +}; + +const getAdjustedConvAttributes = (attributes: T, inputs: Tensor[]): T => { + const kernelShape = attributes.kernelShape.slice(); + // if kernelShape is not specified in the attributes of this op, infer it from the weight tensor dims + if (attributes.kernelShape.length === 0) { + for (let i = 2; i < inputs[1].dims.length; ++i) { + kernelShape.push(inputs[1].dims[i]); + } + } + const pads = attributes.pads.slice(); + PoolConvUtil.adjustPadsBasedOnAutoPad( + inputs[0].dims, attributes.strides, attributes.dilations, kernelShape, pads, attributes.autoPad); + + // always return a new object so does not modify the original attributes + const newAttributes: T = Object.assign({}, attributes); + Object.assign(newAttributes, {kernelShape, pads, cacheKey: attributes.cacheKey}); + return newAttributes; +}; + +export const parseConvAttributes: OperatorInitialization = (node: Graph.Node): ConvAttributes => { + const attributes = node.attributes; + const activationAttributes = parseInternalActivationAttributes(attributes); + // TODO : Make this generic enough to compute default attributes for multi-dimensional conv + const autoPad = attributes.getString('auto_pad', 'NOTSET'); + const dilations = attributes.getInts('dilations', [1, 1]); + const group = attributes.getInt('group', 1); + const kernelShape = attributes.getInts('kernel_shape', []); + const pads = attributes.getInts('pads', [0, 0, 0, 0]); + const strides = attributes.getInts('strides', [1, 1]); + + return createAttributeWithCacheKey({autoPad, dilations, group, kernelShape, pads, strides, ...activationAttributes}); +}; + +const validateInputs = (inputs: Tensor[], attributes: ConvAttributes): void => { + // Refer to the below link for all input checks + // https://github.com/onnx/onnx/blob/master/docs/Operators.md#Conv + if (!inputs || (inputs.length !== 2 && inputs.length !== 3)) { + throw new Error('Conv requires 2 or 3 inputs'); + } + + // TODO : Need to add support for multi-dimensional conv + if (inputs[0].dims.length !== 4 || inputs[1].dims.length !== 4) { + throw new Error('currently only support 2-dimensional conv'); + } + + // FILTER_IN_CHANNEL should be equal to DATA_CHANNEL + const dataChannel = inputs[0].dims[1]; + const filterInChannel = inputs[1].dims[1] * attributes.group; + if (dataChannel !== filterInChannel) { + throw new Error('FILTER_IN_CHANNEL should be equal to DATA_CHANNEL'); + } + + // if bias is provided it should be 1D and the number of elements should be equal to the number of feature maps + if (inputs.length === 3 && (inputs[2].dims.length !== 1 || inputs[1].dims[0] !== inputs[2].dims[0])) { + throw new Error('invalid bias'); + } + + const spatialRank = inputs[0].dims.length - 2; + // wrong dilations dimension + if (attributes.dilations.length !== spatialRank) { + throw new Error(`dilations should be ${spatialRank}D`); + } + + // Wrong strides dimension + if (attributes.strides.length !== spatialRank) { + throw new Error(`strides should be ${spatialRank}D`); + } + + // Wrong pads dimension + if (attributes.pads.length !== spatialRank * 2) { + throw new Error(`pads should be ${spatialRank * 2}D`); + } + + // if kernelShape is specified, it's data length must be 2 less than dims length of the weights tensor + // (the first 2 dims are batch_size and channels) + if (attributes.kernelShape.length !== 0 && attributes.kernelShape.length !== inputs[1].dims.length - 2) { + throw new Error('invalid kernel shape'); + } + + // TODO : Need to add support for float64 + if (inputs[0].type !== 'float32' || inputs[1].type !== 'float32') { + throw new Error('Conv input(X,W) should be float tensor'); + } + + if (inputs.length === 3 && inputs[2].type !== 'float32') { + throw new Error('Conv input(bias) should be float tensor'); + } +}; diff --git a/js/web/test/suite-test-list.jsonc b/js/web/test/suite-test-list.jsonc index 001ce4fccc2f9..b9c4d741748ca 100644 --- a/js/web/test/suite-test-list.jsonc +++ b/js/web/test/suite-test-list.jsonc @@ -332,9 +332,9 @@ "test_concat_3d_axis_0", "test_concat_3d_axis_1", "test_concat_3d_axis_2", - // "test_conv_with_strides_and_asymmetric_padding", - // "test_conv_with_strides_no_padding", - // "test_conv_with_strides_padding", + "test_conv_with_strides_and_asymmetric_padding", + "test_conv_with_strides_no_padding", + "test_conv_with_strides_padding", "test_constant", "test_cos_example", "test_cos", @@ -515,7 +515,7 @@ "asin.jsonc", "ceil.jsonc", "concat.jsonc", - //"conv.jsonc", + "conv.jsonc", "cos.jsonc", "div.jsonc", //"depth-to-space.jsonc",