From 0c5826f013023c477e7342c2de0cf61c2c9b7709 Mon Sep 17 00:00:00 2001 From: wejoncy Date: Tue, 26 Dec 2023 16:43:33 +0800 Subject: [PATCH 1/8] attn_mask fix, set as torch.long (#18931) ### Description ### Motivation and Context --- onnxruntime/python/tools/transformers/large_model_exporter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onnxruntime/python/tools/transformers/large_model_exporter.py b/onnxruntime/python/tools/transformers/large_model_exporter.py index 407c3b80e153f..1601b1a203b9a 100644 --- a/onnxruntime/python/tools/transformers/large_model_exporter.py +++ b/onnxruntime/python/tools/transformers/large_model_exporter.py @@ -252,7 +252,8 @@ def fetch_onnx_inputs_outputs_name( elif name == "attention_mask": attn_mask = onnx_inputs[idx] onnx_inputs[idx] = torch.cat( - (attn_mask, torch.ones((attn_mask.shape[0], 1), device=attn_mask.device)), dim=1 + (attn_mask, torch.ones((attn_mask.shape[0], 1), device=attn_mask.device, dtype=attn_mask.dtype)), + dim=1, ) elif name == "input_ids": input_ids = onnx_inputs[idx] From dbb8680bdc8badd0616cc7fac9d9f82231188be3 Mon Sep 17 00:00:00 2001 From: Sheil Kumar Date: Tue, 26 Dec 2023 12:33:42 -0800 Subject: [PATCH 2/8] Delay load dxcore.dll in addition to ext-ms-win-dxcore-l1-1-0.dll (#18913) Delay load dxcore.dll in addition to ext-ms-win-dxcore-l1-1-0.dll Co-authored-by: Sheil Kumar --- cmake/onnxruntime_providers_dml.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/onnxruntime_providers_dml.cmake b/cmake/onnxruntime_providers_dml.cmake index 01b0bda9fea6b..439be882dcc5e 100644 --- a/cmake/onnxruntime_providers_dml.cmake +++ b/cmake/onnxruntime_providers_dml.cmake @@ -62,7 +62,7 @@ target_link_libraries(onnxruntime_providers_dml PRIVATE delayimp.lib) if (NOT GDK_PLATFORM) - set(onnxruntime_DELAYLOAD_FLAGS "${onnxruntime_DELAYLOAD_FLAGS} /DELAYLOAD:DirectML.dll /DELAYLOAD:d3d12.dll /DELAYLOAD:dxgi.dll /DELAYLOAD:api-ms-win-core-com-l1-1-0.dll /DELAYLOAD:shlwapi.dll /DELAYLOAD:oleaut32.dll /DELAYLOAD:ext-ms-win-dxcore-l1-*.dll /ignore:4199") + set(onnxruntime_DELAYLOAD_FLAGS "${onnxruntime_DELAYLOAD_FLAGS} /DELAYLOAD:DirectML.dll /DELAYLOAD:d3d12.dll /DELAYLOAD:dxgi.dll /DELAYLOAD:dxcore.dll /DELAYLOAD:api-ms-win-core-com-l1-1-0.dll /DELAYLOAD:shlwapi.dll /DELAYLOAD:oleaut32.dll /DELAYLOAD:ext-ms-win-dxcore-l1-*.dll /ignore:4199") endif() target_compile_definitions(onnxruntime_providers_dml From 0bc71b0c9b817b0e54d9b6f1a7f92eaa1b77ea66 Mon Sep 17 00:00:00 2001 From: Xu Xing Date: Wed, 27 Dec 2023 09:23:52 +0800 Subject: [PATCH 3/8] [js/webgpu] Refactor attributes of pool (#18728) --- js/web/lib/wasm/jsep/webgpu/ops/pool.ts | 61 +++++++++++++------------ 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/js/web/lib/wasm/jsep/webgpu/ops/pool.ts b/js/web/lib/wasm/jsep/webgpu/ops/pool.ts index 84d04efc37f28..9e9b361c1af1c 100644 --- a/js/web/lib/wasm/jsep/webgpu/ops/pool.ts +++ b/js/web/lib/wasm/jsep/webgpu/ops/pool.ts @@ -5,7 +5,7 @@ import {env} from 'onnxruntime-common'; import {TensorView} from '../../tensor-view'; import {PoolConvUtil, ShapeUtil} from '../../util'; -import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key'; +import {AttributeWithCacheKey} from '../attribute-with-cache-key'; import {ComputeContext, ProgramInfo, ProgramInputTensorInfoDependency, ProgramUniform} from '../types'; import {createTensorShapeVariables, getElementAt, IndicesHelper, inputVariable, outputVariable, ShaderHelper, UniformsArrayType} from './common'; @@ -63,7 +63,7 @@ const getUniformAndPadInfo = 2 is not supported for NHWC format.'); @@ -110,8 +110,8 @@ const getUniformAndPadInfo = ( shaderHelper: ShaderHelper, x: IndicesHelper, rank: number, outputShapeRank: number, attributes: AttributeType, - op1: string, op2: string, start: number, uniforms: UniformsArrayType, hasPads: boolean, pwStartEnd: boolean, - phStartEnd: boolean): string => { + op1: string, op2: string, start: number, uniforms: UniformsArrayType, hasPads: boolean, pwStartEndNotZero: boolean, + phStartEndNotZero: boolean): string => { const isChannelsLast = attributes.format === 'NHWC'; const dataType = x.type.value; const output = outputVariable('output', x.type.tensor, outputShapeRank); @@ -121,7 +121,7 @@ const generatePoolingCode = + (`${attributes.format};${attributes.ceilMode};${attributes.autoPad};${attributes.kernelShape.length}`); + +const createAveragePoolShaderKeyFromAttributes = (attributes: AveragePoolAttributes): string => + (`${createShaderKeyFromAttributes(attributes)};${attributes.countIncludePad}`); + +const createMaxPoolShaderKeyFromAttributes = (attributes: MaxPoolAttributes): string => + (`${createShaderKeyFromAttributes(attributes)};${attributes.storageOrder};${attributes.dilations}`); + const parsePoolCommonAttributes = (attributes: Record): PoolCommonAttributes => ({ format: attributes.format as FormatAttributes['format'], autoPad: ['NOTSET', 'VALID', 'SAME_UPPER', 'SAME_LOWER'][attributes.auto_pad as number], @@ -285,17 +294,14 @@ const createAveragePoolProgramInfo = } else { op2 += `value /= ${dataType}(i32(uniforms.kernelSize) - pad);`; } - const [programUniforms, uniforms, hasPads, pwStartEnd, phStartEnd] = + const [programUniforms, uniforms, hasPads, pwStartEndNotZero, phStartEndNotZero] = getUniformAndPadInfo(outputShape, adjustedAttributes); - programUniforms.push(...createTensorShapeVariables(input.dims)); - programUniforms.push(...createTensorShapeVariables(outputShape)); + programUniforms.push(...createTensorShapeVariables(input.dims), ...createTensorShapeVariables(outputShape)); const inputDependencies: ProgramInputTensorInfoDependency[] = ['rank']; return { name, - shaderCache: { - hint: attributes.cacheKey + hasPads + pwStartEnd + phStartEnd + adjustedAttributes.countIncludePad, - inputDependencies - }, + shaderCache: + {hint: `${attributes.cacheKey};${hasPads};${pwStartEndNotZero};${phStartEndNotZero}`, inputDependencies}, getRunData: () => ({ outputs: [{dims: outputShape, dataType: input.dataType}], dispatchGroup: {x: Math.ceil(ShapeUtil.size(outputShape) / 64 /* workgroup size */)}, @@ -303,7 +309,7 @@ const createAveragePoolProgramInfo = }), getShaderSource: shaderHelper => generatePoolingCode( shaderHelper, x, input.dims.length, outputShape.length, adjustedAttributes, op1, op2, 0.0, uniforms, - hasPads, pwStartEnd, phStartEnd), + hasPads, pwStartEndNotZero, phStartEndNotZero), }; }; @@ -315,8 +321,8 @@ export const parseAveragePoolAttributes = (attributes: Record): if (attr.ceilMode !== 0) { throw new Error('using ceil() in shape computation is not yet supported for AveragePool'); } - - return createAttributeWithCacheKey({countIncludePad, ...attr}); + const averagePoolAttributes = {countIncludePad, ...attr, cacheKey: ''}; + return {...averagePoolAttributes, cacheKey: createAveragePoolShaderKeyFromAttributes(averagePoolAttributes)}; }; export const averagePool = (context: ComputeContext, attributes: AveragePoolAttributes): void => { @@ -332,8 +338,7 @@ const globalPoolAttributes = { strides: [], pads: [], storageOrder: 0, - dilations: [], - cacheKey: '' + dilations: [] }; export const parseGlobalAveragePoolAttributes = (attributes: Record): AveragePoolAttributes => { @@ -361,13 +366,13 @@ const createMaxPoolProgramInfo = const op2 = ''; const x = inputVariable('x', input.dataType, input.dims.length); const inputDependencies: ProgramInputTensorInfoDependency[] = ['rank']; - const [programUniforms, uniforms, hasPads, pwStartEnd, phStartEnd] = + const [programUniforms, uniforms, hasPads, pwStartEndNotZero, phStartEndNotZero] = getUniformAndPadInfo(outputShape, adjustedAttributes); - programUniforms.push(...createTensorShapeVariables(input.dims)); - programUniforms.push(...createTensorShapeVariables(outputShape)); + programUniforms.push(...createTensorShapeVariables(input.dims), ...createTensorShapeVariables(outputShape)); return { name, - shaderCache: {hint: attributes.cacheKey + hasPads, inputDependencies}, + shaderCache: + {hint: `${attributes.cacheKey};${hasPads};${pwStartEndNotZero};${phStartEndNotZero}`, inputDependencies}, getRunData: () => ({ outputs: [{dims: outputShape, dataType: input.dataType}], dispatchGroup: {x: Math.ceil(ShapeUtil.size(outputShape) / 64 /* workgroup size */)}, @@ -375,7 +380,7 @@ const createMaxPoolProgramInfo = }), getShaderSource: shaderHelper => generatePoolingCode( shaderHelper, x, input.dims.length, outputShape.length, adjustedAttributes, op1, op2, -1e5, uniforms, - hasPads, pwStartEnd, phStartEnd), + hasPads, pwStartEndNotZero, phStartEndNotZero), }; }; @@ -396,8 +401,8 @@ export const parseMaxPoolAttributes = (attributes: Record): Max if (attr.ceilMode !== 0) { throw new Error('using ceil() in shape computation is not yet supported for MaxPool'); } - - return createAttributeWithCacheKey({storageOrder, dilations, ...attr}); + const maxPoolAttributes = {storageOrder, dilations, ...attr, cacheKey: ''}; + return {...maxPoolAttributes, cacheKey: createMaxPoolShaderKeyFromAttributes(maxPoolAttributes)}; }; export const parseGlobalMaxPoolAttributes = (attributes: Record): MaxPoolAttributes => { From 437d339d66d376a5eee05ee09e84f061afab2d2a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 27 Dec 2023 10:34:25 -0800 Subject: [PATCH 4/8] Bump actions/setup-python from 4 to 5 (#18921) --- .github/workflows/lint.yml | 2 +- .github/workflows/linux.yml | 2 +- .github/workflows/windows.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 432c789e943b5..818d63d6e957e 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -36,7 +36,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: # Version range or exact version of Python to use, using SemVer's version range syntax. Reads from .python-version if unset. python-version: "3.10" diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 7b314d845d9b4..6e81a06947150 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -17,7 +17,7 @@ jobs: - uses: actions/checkout@v4 with: submodules: true - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: '3.8.x' architecture: 'x64' diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 14b93c4f4953e..7a6038ff2c2a8 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -21,7 +21,7 @@ jobs: - uses: actions/checkout@v4 with: submodules: false - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: '3.11.x' architecture: 'x64' From e08bdc109d6a6cda6f8628be5cf8f68367ac1970 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 27 Dec 2023 10:34:41 -0800 Subject: [PATCH 5/8] Bump github/codeql-action from 2 to 3 (#18922) --- .github/workflows/codeql.yml | 6 +++--- .github/workflows/lint.yml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index d3ecf44fe5733..4a5b87b3e69ed 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -37,7 +37,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v2 + uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # If this step fails, then you should remove it and run the build manually (see below) - if: ${{ matrix.language != 'cpp' }} name: Autobuild - uses: github/codeql-action/autobuild@v2 + uses: github/codeql-action/autobuild@v3 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 + uses: github/codeql-action/analyze@v3 diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 818d63d6e957e..12b772ceff282 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -65,7 +65,7 @@ jobs: - name: Upload SARIF file if: always() continue-on-error: true - uses: github/codeql-action/upload-sarif@v2 + uses: github/codeql-action/upload-sarif@v3 with: # Path to SARIF file relative to the root of the repository sarif_file: lintrunner.sarif From a90526667d720d46119fbf0faa6d190c633a1c50 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 27 Dec 2023 10:34:53 -0800 Subject: [PATCH 6/8] Bump actions/setup-dotnet from 3 to 4 (#18919) --- .github/workflows/publish-csharp-apidocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/publish-csharp-apidocs.yml b/.github/workflows/publish-csharp-apidocs.yml index 9b9ca924bd008..0754bd58c02a9 100644 --- a/.github/workflows/publish-csharp-apidocs.yml +++ b/.github/workflows/publish-csharp-apidocs.yml @@ -26,7 +26,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Setup .NET - uses: actions/setup-dotnet@v3 + uses: actions/setup-dotnet@v4 with: dotnet-version: 6.0.x - name: Restore dependencies From 31d4a21c4bff39946f3884ab3fec90b1d4d5224e Mon Sep 17 00:00:00 2001 From: Guenther Schmuelling Date: Wed, 27 Dec 2023 15:22:05 -0800 Subject: [PATCH 7/8] [js/webgpu] fix heap access > 2GB (#18914) --- js/web/lib/wasm/jsep/init.ts | 2 +- onnxruntime/core/providers/js/js_kernel.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/js/web/lib/wasm/jsep/init.ts b/js/web/lib/wasm/jsep/init.ts index cad1e87b24a51..3c6edf3ebb35d 100644 --- a/js/web/lib/wasm/jsep/init.ts +++ b/js/web/lib/wasm/jsep/init.ts @@ -69,7 +69,7 @@ class ComputeContextImpl implements ComputeContext { const heapU32 = module.HEAPU32; // extract context data - let dataIndex = (contextDataOffset >> 2); + let dataIndex = (contextDataOffset >>> 2); this.opKernelContext = heapU32[dataIndex++]; const inputCount = heapU32[dataIndex++]; this.outputCount = heapU32[dataIndex++]; diff --git a/onnxruntime/core/providers/js/js_kernel.h b/onnxruntime/core/providers/js/js_kernel.h index fdd5c7dee5bfc..5c2d1f0b881ba 100644 --- a/onnxruntime/core/providers/js/js_kernel.h +++ b/onnxruntime/core/providers/js/js_kernel.h @@ -197,7 +197,7 @@ class JsKernel : public OpKernel { int status_code = EM_ASM_INT( { return Module.jsepRunKernel($0, $1, Module.jsepSessionState.sessionHandle, Module.jsepSessionState.errors); }, - this, reinterpret_cast(p_serialized_kernel_context)); + this, reinterpret_cast(p_serialized_kernel_context)); LOGS_DEFAULT(VERBOSE) << "outputs = " << context->OutputCount() << ". Y.data=" << (size_t)(context->Output(0)->DataRaw()) << "."; From 3bbe4fe2ff27da46944f526481d64b9c964a3066 Mon Sep 17 00:00:00 2001 From: satyajandhyala Date: Wed, 27 Dec 2023 16:21:29 -0800 Subject: [PATCH 8/8] [JS/WebGPU] Add trilinear interpolation to Resize; activation_params attribute is optional for FusedConv also. (#18842) ### Description Add trilinear interpolation to Resize and changed activation_params attribute as optional for FuseConv. ### Motivation and Context --- js/web/lib/wasm/jsep/webgpu/ops/resize.ts | 190 ++++++++++++++---- js/web/test/data/ops/upsample.jsonc | 102 ++++++++++ js/web/test/suite-test-list.jsonc | 1 + .../core/providers/js/operators/conv.h | 12 +- 4 files changed, 254 insertions(+), 51 deletions(-) diff --git a/js/web/lib/wasm/jsep/webgpu/ops/resize.ts b/js/web/lib/wasm/jsep/webgpu/ops/resize.ts index e1369c2c2b43b..d20ef63222155 100644 --- a/js/web/lib/wasm/jsep/webgpu/ops/resize.ts +++ b/js/web/lib/wasm/jsep/webgpu/ops/resize.ts @@ -219,7 +219,7 @@ const initOutputShape = return outputShape; }; -const adjustOutputShape = (inputShape: readonly number[], scales: number[], attributes: ResizeAttributes): number[] => { +const adjustOutputShape = (inputShape: readonly number[], scales: number[], attributes: ResizeAttributes) => { const scaleInPolicy = (() => { switch (attributes.keepAspectRatioPolicy) { case 'not_larger': @@ -312,21 +312,27 @@ const checkInputIndices = (input: IndicesHelper, inputShape: readonly number[]): return true; }`; +const setChannelAndBatchIndices = + (input: IndicesHelper, channelIdx: number, batchIdx: number, spacialDims: number): string => + input.rank > spacialDims ? ` + ${input.indicesSet('input_indices', channelIdx, 'channel')}; + ${input.indicesSet('input_indices', batchIdx, 'batch')}; +` : + ''; + const bilinearInterpolation = - (input: IndicesHelper, output: IndicesHelper, inputShape: readonly number[], scales: readonly number[], - useExtrapolation: boolean, extrapolationValue: number): string => { + (input: IndicesHelper, output: IndicesHelper, inputShape: readonly number[], useExtrapolation: boolean, + extrapolationValue: number): string => { + const isNchw = true; const [batchIdx, heightIdx, widthIdx, channelIdx] = - inputShape.length === 2 ? [-1, 0, 1, -1] : (scales[1] === 1.0 ? [0, 2, 3, 1] : [0, 1, 2, 3]); + inputShape.length === 2 ? [-1, 0, 1, -1] : (isNchw ? [0, 2, 3, 1] : [0, 1, 2, 3]); const dType = input.type.value; return ` fn getInputValue(batch: u32, channel: u32, row: u32, col: u32) -> ${dType} { var input_indices: ${input.type.indices}; ${input.indicesSet('input_indices', heightIdx, `max(0, min(row, ${inputShape[heightIdx]} - 1))`)}; ${input.indicesSet('input_indices', widthIdx, `max(0, min(col, ${inputShape[widthIdx]} - 1))`)}; - if (${inputShape.length} > 2) { - ${input.indicesSet('input_indices', channelIdx, 'channel')}; - ${input.indicesSet('input_indices', batchIdx, 'batch')}; - }; + ${setChannelAndBatchIndices(input, channelIdx, batchIdx, 2)} return ${input.getByIndices('input_indices')}; } @@ -334,30 +340,36 @@ const bilinearInterpolation = var originalIndices = calculateOriginalIndicesFromOutputIndices(output_indices); var row:${dType} = originalIndices[${heightIdx}]; var col:${dType} = originalIndices[${widthIdx}]; - if (${useExtrapolation} && (row < 0 || row > (${inputShape[heightIdx]} - 1) || col < 0 || col > ${ - inputShape[widthIdx]} - 1)) { + ${ + useExtrapolation ? + `if (row < 0 || row > (${inputShape[heightIdx]} - 1) || col < 0 || col > (${inputShape[widthIdx]} - 1))) { return ${extrapolationValue}; - } + }` : + ''}; row = max(0, min(row, ${inputShape[heightIdx]} - 1)); col = max(0, min(col, ${inputShape[widthIdx]} - 1)); var row1: u32 = u32(row); var col1: u32 = u32(col); var row2: u32 = u32(row + 1); var col2: u32 = u32(col + 1); - var channel: u32 = 0; - var batch: u32 = 0; - if (${inputShape.length > 2}) { - channel = u32(originalIndices[${channelIdx}]); - batch = u32(originalIndices[${batchIdx}]); - } + var channel: u32 = ${inputShape.length > 2 ? `u32(originalIndices[${channelIdx}])` : '0'}; + var batch: u32 = ${inputShape.length > 2 ? `u32(originalIndices[${batchIdx}])` : '0'}; var x11: ${dType} = getInputValue(batch, channel, row1, col1); var x12: ${dType} = getInputValue(batch, channel, row1, col2); var x21: ${dType} = getInputValue(batch, channel, row2, col1); var x22: ${dType} = getInputValue(batch, channel, row2, col2); - var dx1: ${dType} = row - ${dType}(row1); - var dx2: ${dType} = ${dType}(row2) - row; - var dy1 = col - ${dType}(col1); - var dy2 = ${dType}(col2) - col; + var dx1: ${dType} = abs(row - ${dType}(row1)); + var dx2: ${dType} = abs(${dType}(row2) - row); + var dy1: ${dType} = abs(col - ${dType}(col1)); + var dy2: ${dType} = abs(${dType}(col2) - col); + if (row1 == row2) { + dx1 = 0.5; + dx2 = 0.5; + } + if (col1 == col2) { + dy1 = 0.5; + dy2 = 0.5; + } return (x11 * dx2 * dy2 + x12 * dx2 * dy1 + x21 * dx1 * dy2 + x22 * dx1 * dy1); }`; }; @@ -366,7 +378,9 @@ const bicubicInterpolation = (input: IndicesHelper, output: IndicesHelper, inputShape: readonly number[], outputShape: readonly number[], scales: readonly number[], roi: readonly number[], cubicCoeffA: number, useExtrapolation: boolean, extrapolationValue: number, excludeOutside: boolean): string => { - const [heightIdx, widthIdx] = inputShape.length === 2 ? [0, 1] : (scales[1] === 1.0) ? [2, 3] : [1, 2]; + const is2D = inputShape.length === 2; + const isNchw = true; + const [heightIdx, widthIdx] = is2D ? [0, 1] : isNchw ? [2, 3] : [1, 2]; const dType = input.type.value; const createCubicInterpolationFunction = (idx: number): string => { const direction = idx === heightIdx ? 'row' : 'col'; @@ -386,16 +400,18 @@ const bicubicInterpolation = for (var i: i32 = -1; i < 3; i++) { var ${direction}: ${dType} = originalIdx + ${dType}(i); if (${direction} < 0 || ${direction} >= ${inputShape[idx]}) { - if (${excludeOutside}) { - coefs[i + 1] = 0.0; - continue; - } else if (${useExtrapolation}) { - return ${extrapolationValue}; - } else { - ${direction} = max(0, min(${direction}, ${inputShape[idx]} - 1)); - } + ${(() => { + if (excludeOutside) { + return `coefs[i + 1] = 0.0; + continue;`; + } else if (useExtrapolation) { + return `return ${extrapolationValue};`; + } else { + return `${direction} = max(0, min(${direction}, ${inputShape[idx]} - 1));`; } - var input_indices_copy: ${input.type.indices} = input_indices; + })()}; + } + var input_indices_copy: ${input.type.indices} = input_indices; ${input.indicesSet('input_indices_copy', idx, `u32(${direction})`)}; data[i + 1] = ${ idx === heightIdx ? input.getByIndices('input_indices_copy') : @@ -435,6 +451,78 @@ const bicubicInterpolation = `; }; +const trilinearInterpolation = + (input: IndicesHelper, output: IndicesHelper, inputShape: readonly number[], useExtrapolation: boolean, + extrapolationValue: number): string => { + const isNchw = true; + const [batchIdx, depthIdx, heightIdx, widthIdx, channelIdx] = + inputShape.length === 3 ? [-1, 0, 1, 2, -1] : (isNchw ? [0, 2, 3, 4, 1] : [0, 1, 2, 3, 4]); + const dType = input.type.value; + return ` + fn getInputValue(batch: u32, channel: u32, depth:u32, height: u32, width: u32) -> ${dType} { + var input_indices: ${input.type.indices}; + ${input.indicesSet('input_indices', depthIdx, `max(0, min(depth, ${inputShape[depthIdx]} - 1))`)}; + ${input.indicesSet('input_indices', heightIdx, `max(0, min(height, ${inputShape[heightIdx]} - 1))`)}; + ${input.indicesSet('input_indices', widthIdx, `max(0, min(width, ${inputShape[widthIdx]} - 1))`)}; + ${setChannelAndBatchIndices(input, channelIdx, batchIdx, 3)} + return ${input.getByIndices('input_indices')}; + } + + fn trilinearInterpolation(output_indices: ${output.type.indices}) -> ${dType} { + var originalIndices = calculateOriginalIndicesFromOutputIndices(output_indices); + var depth:${dType} = originalIndices[${depthIdx}]; + var height:${dType} = originalIndices[${heightIdx}]; + var width:${dType} = originalIndices[${widthIdx}]; + ${ + useExtrapolation ? `if (depth < 0 || depth > (${inputShape[depthIdx]} - 1) || height < 0 || height > (${ + inputShape[heightIdx]} - 1) || width < 0 || (width > ${inputShape[widthIdx]} - 1))) { + return ${extrapolationValue}; + }` : + ''}; + + depth = max(0, min(depth, ${inputShape[depthIdx]} - 1)); + height = max(0, min(height, ${inputShape[heightIdx]} - 1)); + width = max(0, min(width, ${inputShape[widthIdx]} - 1)); + var depth1: u32 = u32(depth); + var height1: u32 = u32(height); + var width1: u32 = u32(width); + var depth2: u32 = u32(depth + 1); + var height2: u32 = u32(height + 1); + var width2: u32 = u32(width + 1); + var channel: u32 = ${inputShape.length > 3 ? `u32(originalIndices[${channelIdx}])` : '0'}; + var batch: u32 = ${inputShape.length > 3 ? `u32(originalIndices[${batchIdx}])` : '0'}; + + var x111: ${dType} = getInputValue(batch, channel, depth1, height1, width1); + var x112: ${dType} = getInputValue(batch, channel, depth1, height1, width2); + var x121: ${dType} = getInputValue(batch, channel, depth1, height2, width1); + var x122: ${dType} = getInputValue(batch, channel, depth1, height2, width2); + var x211: ${dType} = getInputValue(batch, channel, depth2, height1, width1); + var x212: ${dType} = getInputValue(batch, channel, depth2, height1, width2); + var x221: ${dType} = getInputValue(batch, channel, depth2, height2, width1); + var x222: ${dType} = getInputValue(batch, channel, depth2, height2, width2); + var dx1: ${dType} = abs(depth - ${dType}(depth1)); + var dx2: ${dType} = abs(${dType}(depth2) - depth); + var dy1: ${dType} = abs(height - ${dType}(height1)); + var dy2: ${dType} = abs(${dType}(height2) - height); + var dz1: ${dType} = abs(width - ${dType}(width1)); + var dz2: ${dType} = abs(${dType}(width2) - width); + if (depth1 == depth2) { + dx1 = 0.5; + dx2 = 0.5; + } + if (height1 == height2) { + dy1 = 0.5; + dy2 = 0.5; + } + if (width1 == width2) { + dz1 = 0.5; + dz2 = 0.5; + } + return (x111 * dx2 * dy2 * dz2 + x112 * dx2 * dy2 * dz1 + x121 * dx2 * dy1 *dz2 + x122 * dx2 * dy1 * dz1 + + x211 * dx1 * dy2 * dz2 + x212 * dx1 * dy2 * dz1 + x221 * dx1 * dy1 *dz2 + x222 * dx1 * dy1 * dz1); + }`; + }; + const createResizeProgramInfo = (inputTensor: TensorView, attributes: ResizeAttributes, opsetVersion: number, scalesInput: readonly number[], sizes: readonly number[], roiInput: readonly number[]): ProgramInfo => { @@ -454,6 +542,7 @@ const createResizeProgramInfo = const outputSize = ShapeUtil.size(outputShape); const noScale = inputShape.length === outputShape.length && inputShape.every((d, i) => d === outputShape[i]); const useExtrapolation = attributes.coordinateTransformMode === 'tf_crop_and_resize'; + const extrapolationValue = attributes.extrapolationValue; const dataType = input.type.value; const getShaderSource = (shaderHelper: ShaderHelper) => ` ${noScale ? '' : ` @@ -471,16 +560,28 @@ const createResizeProgramInfo = case 'linear': return ` ${calculateOriginalIndicesFromOutputIndices(output, inputShape, outputShape, scales.length, roi.length)}; - ${ - bilinearInterpolation( - input, output, inputShape, scales, useExtrapolation, attributes.extrapolationValue)}; - `; + ${(() => { + if (inputShape.length === 2 || inputShape.length === 4) { + return `${bilinearInterpolation(input, output, inputShape, useExtrapolation, extrapolationValue)}`; + } else if (inputShape.length === 3 || inputShape.length === 5) { + return `${trilinearInterpolation(input, output, inputShape, useExtrapolation, extrapolationValue)}`; + } else { + throw Error('Linear mode only supports input dims 2, 3, 4 and 5 are supported in linear mode.'); + } + })()}; + `; case 'cubic': return ` - ${ - bicubicInterpolation( - input, output, inputShape, outputShape, scales, roi, attributes.cubicCoeffA, useExtrapolation, - attributes.extrapolationValue, attributes.excludeOutside)}; + ${(() => { + if (inputShape.length === 2 || inputShape.length === 4) { + return `${ + bicubicInterpolation( + input, output, inputShape, outputShape, scales, roi, attributes.cubicCoeffA, useExtrapolation, + attributes.extrapolationValue, attributes.excludeOutside)}`; + } else { + throw Error('Cubic mode only supports input dims 2 and 4 are supported in linear mode.'); + } + })()}; `; default: throw Error('Invalid resize mode'); @@ -507,21 +608,23 @@ const createResizeProgramInfo = output[global_idx] = ${attributes.extrapolationValue}; }`; case 'linear': - return 'output[global_idx] = bilinearInterpolation(output_indices);'; + return `output[global_idx] = ${ + (inputShape.length === 2 || inputShape.length === 4) ? 'bilinearInterpolation' : + 'trilinearInterpolation'}(output_indices);`; case 'cubic': return 'output[global_idx] = bicubicInterpolation(output_indices);'; default: throw Error(`Unsupported resize mode: ${attributes.mode}`); } })()}; - `} +`} }`; return { name: 'Resize', shaderCache: { hint: `${attributes.cacheKey}|${opsetVersion}|${scales.length > 0 ? scales : ''}|${ - sizes.length > 0 ? sizes : ''}|${roi.length > 0 ? roi : ''}|${noScale}`, + sizes.length > 0 ? sizes : ''}|${roi.length > 0 ? roi : ''}|${noScale}|${inputShape}`, inputDependencies: ['rank'] }, getShaderSource, @@ -551,6 +654,9 @@ export const resize = (context: ComputeContext, attributes: ResizeAttributes): v const sizes: number[] = []; const roi: number[] = []; const opsetVersion = getOpsetVersionFromCustomDataBuffer(context); + if (attributes.antialias !== 0) { + throw Error('Only default value (0) for Antialias attribute is supported'); + } validateInputs(context.inputs, attributes, opsetVersion, scales, sizes, roi); context.compute( createResizeProgramInfo(context.inputs[0], attributes, opsetVersion, scales, sizes, roi), {inputs: [0]}); diff --git a/js/web/test/data/ops/upsample.jsonc b/js/web/test/data/ops/upsample.jsonc index 6c11a8fa3f6a5..c0ad8e547ff2e 100644 --- a/js/web/test/data/ops/upsample.jsonc +++ b/js/web/test/data/ops/upsample.jsonc @@ -2,6 +2,7 @@ { "name": "Upsample - Nearest", "operator": "Upsample", + "opset": { "domain": "", "version": 7 }, "attributes": [{ "name": "scales", "data": [1.0, 1.0, 2.0, 3.0], "type": "floats" }], "cases": [ { @@ -32,6 +33,7 @@ { "name": "Upsample - Nearest2X", "operator": "Upsample", + "opset": { "domain": "", "version": 7 }, "attributes": [{ "name": "scales", "data": [1.0, 1.0, 2.0, 2.0], "type": "floats" }], "cases": [ { @@ -60,6 +62,7 @@ { "name": "Upsample - Nearest222X", "operator": "Upsample", + "opset": { "domain": "", "version": 7 }, "attributes": [{ "name": "scales", "data": [2.0, 1.0, 2.0, 2.0], "type": "floats" }], "cases": [ { @@ -92,6 +95,7 @@ { "name": "Upsample - Nearest15X", "operator": "Upsample", + "opset": { "domain": "", "version": 7 }, "attributes": [{ "name": "scales", "data": [1.0, 1.0, 2.0, 1.5], "type": "floats" }], "cases": [ { @@ -120,6 +124,7 @@ { "name": "Upsample - Nearest_NoScale", "operator": "Upsample", + "opset": { "domain": "", "version": 7 }, "attributes": [ { "name": "scales", "data": [1.0, 1.0, 1.0, 1.0], "type": "floats" }, { "name": "mode", "data": "nearest", "type": "string" } @@ -147,6 +152,7 @@ { "name": "Upsample - 4D Bilinear", "operator": "Upsample", + "opset": { "domain": "", "version": 7 }, "attributes": [ { "name": "scales", "data": [1.0, 1.0, 2.0, 4.0], "type": "floats" }, { "name": "mode", "data": "linear", "type": "string" } @@ -180,6 +186,7 @@ { "name": "Upsample - 2D Bilinear", "operator": "Upsample", + "opset": { "domain": "", "version": 7 }, "attributes": [ { "name": "scales", "data": [2.0, 4.0], "type": "floats" }, { "name": "mode", "data": "linear", "type": "string" } @@ -210,6 +217,7 @@ { "name": "Upsample - 4D Bilinear ScalesNoOp", "operator": "Upsample", + "opset": { "domain": "", "version": 7 }, "attributes": [ { "name": "scales", "data": [1.0, 1.0, 1.0, 1.0], "type": "floats" }, { "name": "mode", "data": "linear", "type": "string" } @@ -237,6 +245,7 @@ { "name": "Upsample - 1D Nearest", "operator": "Upsample", + "opset": { "domain": "", "version": 7 }, "attributes": [ { "name": "scales", "data": [2.0], "type": "floats" }, { "name": "mode", "data": "nearest", "type": "string" } @@ -260,5 +269,98 @@ ] } ] + }, + { + "name": "Upsample - 5D Trilinear", + "operator": "Upsample", + "opset": { "domain": "", "version": 7 }, + "attributes": [ + { "name": "scales", "data": [1.0, 1.0, 1.0, 2.0, 4.0], "type": "floats" }, + { "name": "mode", "data": "linear", "type": "string" } + ], + "cases": [ + { + "name": "X", + "inputs": [ + { + "data": [1.0, 3.0, 3.0, 5.0, 3.0, 5.0, 7.0, 9.0], + "dims": [1, 2, 1, 2, 2], + "type": "float32" + } + ], + "outputs": [ + { + "data": [ + 1.0, 1.5, 2.0, 2.5, 3.0, 3.0, 3.0, 3.0, 2.0, 2.5, 3.0, 3.5, 4.0, 4.0, 4.0, 4.0, 3.0, 3.5, 4.0, 4.5, 5.0, + 5.0, 5.0, 5.0, 3.0, 3.5, 4.0, 4.5, 5.0, 5.0, 5.0, 5.0, + + 3.0, 3.5, 4.0, 4.5, 5.0, 5.0, 5.0, 5.0, 5.0, 5.5, 6.0, 6.5, 7.0, 7.0, 7.0, 7.0, 7.0, 7.5, 8.0, 8.5, 9.0, + 9.0, 9.0, 9.0, 7.0, 7.5, 8.0, 8.5, 9.0, 9.0, 9.0, 9.0 + ], + "dims": [1, 2, 1, 4, 8], + "type": "float32" + } + ] + } + ] + }, + { + "name": "Upsample - 3D Trilinear", + "operator": "Upsample", + "opset": { "domain": "", "version": 7 }, + "attributes": [ + { "name": "scales", "data": [1.0, 2.0, 4.0], "type": "floats" }, + { "name": "mode", "data": "linear", "type": "string" } + ], + "cases": [ + { + "name": "X", + "inputs": [ + { + "data": [1.0, 3.0, 3.0, 5.0], + "dims": [1, 2, 2], + "type": "float32" + } + ], + "outputs": [ + { + "data": [ + 1.0, 1.5, 2.0, 2.5, 3.0, 3.0, 3.0, 3.0, 2.0, 2.5, 3.0, 3.5, 4.0, 4.0, 4.0, 4.0, 3.0, 3.5, 4.0, 4.5, 5.0, + 5.0, 5.0, 5.0, 3.0, 3.5, 4.0, 4.5, 5.0, 5.0, 5.0, 5.0 + ], + "dims": [1, 4, 8], + "type": "float32" + } + ] + } + ] + }, + { + "name": "Upsample - 3D Trilinear ScalesNoOp", + "operator": "Upsample", + "opset": { "domain": "", "version": 7 }, + "attributes": [ + { "name": "scales", "data": [1.0, 1.0, 1.0], "type": "floats" }, + { "name": "mode", "data": "linear", "type": "string" } + ], + "cases": [ + { + "name": "X", + "inputs": [ + { + "data": [1.0, 3.0, 3.0, 5.0, 3.0, 5.0, 7.0, 9.0], + "dims": [2, 2, 2], + "type": "float32" + } + ], + "outputs": [ + { + "data": [1.0, 3.0, 3.0, 5.0, 3.0, 5.0, 7.0, 9.0], + "dims": [2, 2, 2], + "type": "float32" + } + ] + } + ] } ] diff --git a/js/web/test/suite-test-list.jsonc b/js/web/test/suite-test-list.jsonc index a313adef7151b..594ce9feed31e 100644 --- a/js/web/test/suite-test-list.jsonc +++ b/js/web/test/suite-test-list.jsonc @@ -1392,6 +1392,7 @@ "tile.jsonc", "transpose.jsonc", "transpose_int32_uint32.jsonc", + "upsample.jsonc", "where.jsonc" // Turn on this when https://github.com/microsoft/onnxruntime/issues/17405 is fixed. //"where_broadcast.jsonc", diff --git a/onnxruntime/core/providers/js/operators/conv.h b/onnxruntime/core/providers/js/operators/conv.h index 8f438a319f138..5c0fbf93a4004 100644 --- a/onnxruntime/core/providers/js/operators/conv.h +++ b/onnxruntime/core/providers/js/operators/conv.h @@ -3,8 +3,8 @@ #pragma once -#include #include +#include #include "core/providers/js/js_kernel.h" #include "core/providers/cpu/nn/conv_attributes.h" @@ -17,7 +17,6 @@ class ConvBase : public JsKernel { ConvBase(const OpKernelInfo& info, bool is_channels_last, bool is_fused_conv) : JsKernel(info), conv_attrs_(info), w_is_const_(false) { - std::vector activation_params; TensorShapeVector kernel_shape; const size_t pads_vec_size = conv_attrs_.pads.size() == 0 ? 4 : conv_attrs_.pads.size(); std::vector local_pads(pads_vec_size, 0); @@ -28,13 +27,8 @@ class ConvBase : public JsKernel { if (conv_attrs_.kernel_shape_specified) { ORT_ENFORCE(info.GetAttrs("kernel_shape", kernel_shape).IsOK()); } - if (is_fused_conv) { - ORT_THROW_IF_ERROR(info.GetAttr("activation", &conv_attrs_.activation)); - ORT_THROW_IF_ERROR(info.GetAttrs("activation_params", activation_params)); - } else { - conv_attrs_.activation = info.GetAttrOrDefault("activation", ""); - activation_params = info.GetAttrsOrDefault("activation_params", activation_params); - } + conv_attrs_.activation = info.GetAttrOrDefault("activation", ""); + std::vector activation_params = info.GetAttrsOrDefault("activation_params"); const auto* activation_params_ptr = activation_params.size() > 0 ? activation_params.data() : nullptr; int64_t channels_last = is_channels_last ? 1 : info.GetAttrOrDefault("channels_last", 0); auto kernel_shape_0 = conv_attrs_.kernel_shape_specified && kernel_shape.size() > 0 ? kernel_shape[0] : 0;