Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/microsoft/onnxruntime into …
Browse files Browse the repository at this point in the history
…pengwa/mem_improvement
  • Loading branch information
pengwa committed Dec 28, 2023
2 parents 28f7c9e + 3bbe4fe commit b5b7e69
Show file tree
Hide file tree
Showing 14 changed files with 300 additions and 91 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:

# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
Expand All @@ -51,7 +51,7 @@ jobs:
# If this step fails, then you should remove it and run the build manually (see below)
- if: ${{ matrix.language != 'cpp' }}
name: Autobuild
uses: github/codeql-action/autobuild@v2
uses: github/codeql-action/autobuild@v3

- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2
uses: github/codeql-action/analyze@v3
4 changes: 2 additions & 2 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
# Version range or exact version of Python to use, using SemVer's version range syntax. Reads from .python-version if unset.
python-version: "3.10"
Expand Down Expand Up @@ -65,7 +65,7 @@ jobs:
- name: Upload SARIF file
if: always()
continue-on-error: true
uses: github/codeql-action/upload-sarif@v2
uses: github/codeql-action/upload-sarif@v3
with:
# Path to SARIF file relative to the root of the repository
sarif_file: lintrunner.sarif
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
- uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-python@v4
- uses: actions/setup-python@v5
with:
python-version: '3.8.x'
architecture: 'x64'
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/publish-csharp-apidocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Setup .NET
uses: actions/setup-dotnet@v3
uses: actions/setup-dotnet@v4
with:
dotnet-version: 6.0.x
- name: Restore dependencies
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
- uses: actions/checkout@v4
with:
submodules: false
- uses: actions/setup-python@v4
- uses: actions/setup-python@v5
with:
python-version: '3.11.x'
architecture: 'x64'
Expand Down
2 changes: 1 addition & 1 deletion cmake/onnxruntime_providers_dml.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
target_link_libraries(onnxruntime_providers_dml PRIVATE delayimp.lib)

if (NOT GDK_PLATFORM)
set(onnxruntime_DELAYLOAD_FLAGS "${onnxruntime_DELAYLOAD_FLAGS} /DELAYLOAD:DirectML.dll /DELAYLOAD:d3d12.dll /DELAYLOAD:dxgi.dll /DELAYLOAD:api-ms-win-core-com-l1-1-0.dll /DELAYLOAD:shlwapi.dll /DELAYLOAD:oleaut32.dll /DELAYLOAD:ext-ms-win-dxcore-l1-*.dll /ignore:4199")
set(onnxruntime_DELAYLOAD_FLAGS "${onnxruntime_DELAYLOAD_FLAGS} /DELAYLOAD:DirectML.dll /DELAYLOAD:d3d12.dll /DELAYLOAD:dxgi.dll /DELAYLOAD:dxcore.dll /DELAYLOAD:api-ms-win-core-com-l1-1-0.dll /DELAYLOAD:shlwapi.dll /DELAYLOAD:oleaut32.dll /DELAYLOAD:ext-ms-win-dxcore-l1-*.dll /ignore:4199")
endif()

target_compile_definitions(onnxruntime_providers_dml
Expand Down
2 changes: 1 addition & 1 deletion js/web/lib/wasm/jsep/init.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class ComputeContextImpl implements ComputeContext {
const heapU32 = module.HEAPU32;

// extract context data
let dataIndex = (contextDataOffset >> 2);
let dataIndex = (contextDataOffset >>> 2);
this.opKernelContext = heapU32[dataIndex++];
const inputCount = heapU32[dataIndex++];
this.outputCount = heapU32[dataIndex++];
Expand Down
61 changes: 33 additions & 28 deletions js/web/lib/wasm/jsep/webgpu/ops/pool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import {env} from 'onnxruntime-common';

import {TensorView} from '../../tensor-view';
import {PoolConvUtil, ShapeUtil} from '../../util';
import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key';
import {AttributeWithCacheKey} from '../attribute-with-cache-key';
import {ComputeContext, ProgramInfo, ProgramInputTensorInfoDependency, ProgramUniform} from '../types';

import {createTensorShapeVariables, getElementAt, IndicesHelper, inputVariable, outputVariable, ShaderHelper, UniformsArrayType} from './common';
Expand Down Expand Up @@ -63,7 +63,7 @@ const getUniformAndPadInfo = <AttributeType extends AveragePoolAttributes|MaxPoo
const sw = attributes.strides[attributes.strides.length - 1];
const pwStart = attributes.pads[attributes.pads.length / 2 - 1];
const pwEnd = attributes.pads[attributes.pads.length - 1];
const pwStartEnd = !!(pwStart + pwEnd);
const pwStartEndNotZero = !!(pwStart + pwEnd);
programUniforms.push(
{type: 'uint32', data: kw},
{type: 'uint32', data: sw},
Expand All @@ -74,13 +74,13 @@ const getUniformAndPadInfo = <AttributeType extends AveragePoolAttributes|MaxPoo
{name: 'kw', type: 'u32'}, {name: 'sw', type: 'u32'}, {name: 'pwStart', type: 'u32'},
{name: 'pwEnd', type: 'u32'});

let phStartEnd = false;
let phStartEndNotZero = false;
if (attributes.kernelShape.length === 2) {
const kh = attributes.kernelShape[attributes.kernelShape.length - 2];
const sh = attributes.strides[attributes.strides.length - 2];
const phStart = attributes.pads[attributes.pads.length / 2 - 2];
const phEnd = attributes.pads[attributes.pads.length - 2];
phStartEnd = !!(phStart + phEnd);
phStartEndNotZero = !!(phStart + phEnd);
programUniforms.push(
{type: 'uint32', data: kh}, {type: 'uint32', data: sh}, {type: 'uint32', data: phStart},
{type: 'uint32', data: phEnd});
Expand All @@ -89,7 +89,7 @@ const getUniformAndPadInfo = <AttributeType extends AveragePoolAttributes|MaxPoo
{name: 'kh', type: 'u32'}, {name: 'sh', type: 'u32'}, {name: 'phStart', type: 'u32'},
{name: 'phEnd', type: 'u32'});
}
return [programUniforms, uniforms, true, pwStartEnd, phStartEnd];
return [programUniforms, uniforms, true, pwStartEndNotZero, phStartEndNotZero];
} else {
if (isChannelsLast) {
throw new Error('Pooling with kernelShape.length > 2 is not supported for NHWC format.');
Expand All @@ -110,8 +110,8 @@ const getUniformAndPadInfo = <AttributeType extends AveragePoolAttributes|MaxPoo

const generatePoolingCode = <AttributeType extends AveragePoolAttributes|MaxPoolAttributes>(
shaderHelper: ShaderHelper, x: IndicesHelper, rank: number, outputShapeRank: number, attributes: AttributeType,
op1: string, op2: string, start: number, uniforms: UniformsArrayType, hasPads: boolean, pwStartEnd: boolean,
phStartEnd: boolean): string => {
op1: string, op2: string, start: number, uniforms: UniformsArrayType, hasPads: boolean, pwStartEndNotZero: boolean,
phStartEndNotZero: boolean): string => {
const isChannelsLast = attributes.format === 'NHWC';
const dataType = x.type.value;
const output = outputVariable('output', x.type.tensor, outputShapeRank);
Expand All @@ -121,7 +121,7 @@ const generatePoolingCode = <AttributeType extends AveragePoolAttributes|MaxPool
let codeH = '';
let codeHEnd = '';
const dimIdxW = rank - (isChannelsLast ? 2 : 1);
if (pwStartEnd === true) {
if (pwStartEndNotZero) {
codeW = `
for (var i: u32 = 0u; i < uniforms.kw; i++) {
xIndices[${dimIdxW}] = indices[${dimIdxW}] * uniforms.sw - uniforms.pwStart + i;
Expand All @@ -144,7 +144,7 @@ const generatePoolingCode = <AttributeType extends AveragePoolAttributes|MaxPool

if (attributes.kernelShape.length === 2) {
const dimIdxH = rank - (isChannelsLast ? 3 : 2);
if (phStartEnd === true) {
if (phStartEndNotZero) {
codeH = `
for (var j: u32 = 0u; j < uniforms.kh; j++) {
xIndices[${dimIdxH}] = indices[${dimIdxH}] * uniforms.sh - uniforms.phStart + j;
Expand Down Expand Up @@ -258,6 +258,15 @@ export interface PoolCommonAttributes extends FormatAttributes {
readonly pads: readonly number[];
}

const createShaderKeyFromAttributes = (attributes: PoolCommonAttributes): string =>
(`${attributes.format};${attributes.ceilMode};${attributes.autoPad};${attributes.kernelShape.length}`);

const createAveragePoolShaderKeyFromAttributes = (attributes: AveragePoolAttributes): string =>
(`${createShaderKeyFromAttributes(attributes)};${attributes.countIncludePad}`);

const createMaxPoolShaderKeyFromAttributes = (attributes: MaxPoolAttributes): string =>
(`${createShaderKeyFromAttributes(attributes)};${attributes.storageOrder};${attributes.dilations}`);

const parsePoolCommonAttributes = (attributes: Record<string, unknown>): PoolCommonAttributes => ({
format: attributes.format as FormatAttributes['format'],
autoPad: ['NOTSET', 'VALID', 'SAME_UPPER', 'SAME_LOWER'][attributes.auto_pad as number],
Expand Down Expand Up @@ -285,25 +294,22 @@ const createAveragePoolProgramInfo =
} else {
op2 += `value /= ${dataType}(i32(uniforms.kernelSize) - pad);`;
}
const [programUniforms, uniforms, hasPads, pwStartEnd, phStartEnd] =
const [programUniforms, uniforms, hasPads, pwStartEndNotZero, phStartEndNotZero] =
getUniformAndPadInfo(outputShape, adjustedAttributes);
programUniforms.push(...createTensorShapeVariables(input.dims));
programUniforms.push(...createTensorShapeVariables(outputShape));
programUniforms.push(...createTensorShapeVariables(input.dims), ...createTensorShapeVariables(outputShape));
const inputDependencies: ProgramInputTensorInfoDependency[] = ['rank'];
return {
name,
shaderCache: {
hint: attributes.cacheKey + hasPads + pwStartEnd + phStartEnd + adjustedAttributes.countIncludePad,
inputDependencies
},
shaderCache:
{hint: `${attributes.cacheKey};${hasPads};${pwStartEndNotZero};${phStartEndNotZero}`, inputDependencies},
getRunData: () => ({
outputs: [{dims: outputShape, dataType: input.dataType}],
dispatchGroup: {x: Math.ceil(ShapeUtil.size(outputShape) / 64 /* workgroup size */)},
programUniforms
}),
getShaderSource: shaderHelper => generatePoolingCode(
shaderHelper, x, input.dims.length, outputShape.length, adjustedAttributes, op1, op2, 0.0, uniforms,
hasPads, pwStartEnd, phStartEnd),
hasPads, pwStartEndNotZero, phStartEndNotZero),
};
};

Expand All @@ -315,8 +321,8 @@ export const parseAveragePoolAttributes = (attributes: Record<string, unknown>):
if (attr.ceilMode !== 0) {
throw new Error('using ceil() in shape computation is not yet supported for AveragePool');
}

return createAttributeWithCacheKey({countIncludePad, ...attr});
const averagePoolAttributes = {countIncludePad, ...attr, cacheKey: ''};
return {...averagePoolAttributes, cacheKey: createAveragePoolShaderKeyFromAttributes(averagePoolAttributes)};
};

export const averagePool = (context: ComputeContext, attributes: AveragePoolAttributes): void => {
Expand All @@ -332,8 +338,7 @@ const globalPoolAttributes = {
strides: [],
pads: [],
storageOrder: 0,
dilations: [],
cacheKey: ''
dilations: []
};

export const parseGlobalAveragePoolAttributes = (attributes: Record<string, unknown>): AveragePoolAttributes => {
Expand Down Expand Up @@ -361,21 +366,21 @@ const createMaxPoolProgramInfo =
const op2 = '';
const x = inputVariable('x', input.dataType, input.dims.length);
const inputDependencies: ProgramInputTensorInfoDependency[] = ['rank'];
const [programUniforms, uniforms, hasPads, pwStartEnd, phStartEnd] =
const [programUniforms, uniforms, hasPads, pwStartEndNotZero, phStartEndNotZero] =
getUniformAndPadInfo(outputShape, adjustedAttributes);
programUniforms.push(...createTensorShapeVariables(input.dims));
programUniforms.push(...createTensorShapeVariables(outputShape));
programUniforms.push(...createTensorShapeVariables(input.dims), ...createTensorShapeVariables(outputShape));
return {
name,
shaderCache: {hint: attributes.cacheKey + hasPads, inputDependencies},
shaderCache:
{hint: `${attributes.cacheKey};${hasPads};${pwStartEndNotZero};${phStartEndNotZero}`, inputDependencies},
getRunData: () => ({
outputs: [{dims: outputShape, dataType: input.dataType}],
dispatchGroup: {x: Math.ceil(ShapeUtil.size(outputShape) / 64 /* workgroup size */)},
programUniforms
}),
getShaderSource: shaderHelper => generatePoolingCode(
shaderHelper, x, input.dims.length, outputShape.length, adjustedAttributes, op1, op2, -1e5, uniforms,
hasPads, pwStartEnd, phStartEnd),
hasPads, pwStartEndNotZero, phStartEndNotZero),
};
};

Expand All @@ -396,8 +401,8 @@ export const parseMaxPoolAttributes = (attributes: Record<string, unknown>): Max
if (attr.ceilMode !== 0) {
throw new Error('using ceil() in shape computation is not yet supported for MaxPool');
}

return createAttributeWithCacheKey({storageOrder, dilations, ...attr});
const maxPoolAttributes = {storageOrder, dilations, ...attr, cacheKey: ''};
return {...maxPoolAttributes, cacheKey: createMaxPoolShaderKeyFromAttributes(maxPoolAttributes)};
};

export const parseGlobalMaxPoolAttributes = (attributes: Record<string, unknown>): MaxPoolAttributes => {
Expand Down
Loading

0 comments on commit b5b7e69

Please sign in to comment.