Skip to content

Commit

Permalink
[js/web] support external data in npm test (#19377)
Browse files Browse the repository at this point in the history
### Description
support external data in npm test.

This allows test runner to detect whether an external data is available
in the test folder, and if it is, load it as external data
automatically.

this feature does not parse every model to figure out whether the model
has external data. the following comments in code explained how to
determine whether should parse the model file.

```js
      // for performance consideration, we do not parse every model. when we think it's likely to have external
      // data, we will parse it. We think it's "likely" when one of the following conditions is met:
      // 1. any file in the same folder has the similar file name as the model file
      //    (e.g., model file is "model_abc.onnx", and there is a file "model_abc.pb" or "model_abc.onnx.data")
      // 2. the file size is larger than 1GB
```
  • Loading branch information
fs-eire authored Feb 2, 2024
1 parent efc17e7 commit 50806a7
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 6 deletions.
49 changes: 48 additions & 1 deletion js/web/script/test-runner-cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import * as os from 'os';
import * as path from 'path';
import {inspect} from 'util';

import {onnx} from '../lib/onnxjs/ort-schema/protobuf/onnx';
import {bufferToBase64} from '../test/test-shared';
import {Test} from '../test/test-types';

Expand Down Expand Up @@ -264,10 +265,12 @@ async function main() {

let modelUrl: string|null = null;
let cases: Test.ModelTestCase[] = [];
let externalData: Array<{data: string; path: string}>|undefined;

npmlog.verbose('TestRunnerCli.Init.Model', `Start to prepare test data from folder: ${testDataRootFolder}`);

try {
const maybeExternalDataFiles: Array<[fileNameWithoutExtension: string, size: number]> = [];
for (const thisPath of fs.readdirSync(testDataRootFolder)) {
const thisFullPath = path.join(testDataRootFolder, thisPath);
const stat = fs.lstatSync(thisFullPath);
Expand All @@ -282,6 +285,8 @@ async function main() {
} else {
throw new Error('there are multiple model files under the folder specified');
}
} else {
maybeExternalDataFiles.push([path.parse(thisPath).name, stat.size]);
}
} else if (stat.isDirectory()) {
const dataFiles: string[] = [];
Expand All @@ -307,6 +312,34 @@ async function main() {
if (modelUrl === null) {
throw new Error('there are no model file under the folder specified');
}
// for performance consideration, we do not parse every model. when we think it's likely to have external
// data, we will parse it. We think it's "likely" when one of the following conditions is met:
// 1. any file in the same folder has the similar file name as the model file
// (e.g., model file is "model_abc.onnx", and there is a file "model_abc.pb" or "model_abc.onnx.data")
// 2. the file size is larger than 1GB
const likelyToHaveExternalData = maybeExternalDataFiles.some(
([fileNameWithoutExtension, size]) =>
path.basename(modelUrl!).startsWith(fileNameWithoutExtension) || size >= 1 * 1024 * 1024 * 1024);
if (likelyToHaveExternalData) {
const model = onnx.ModelProto.decode(fs.readFileSync(path.join(testDataRootFolder, path.basename(modelUrl!))));
const externalDataPathSet = new Set<string>();
for (const initializer of model.graph!.initializer!) {
if (initializer.externalData) {
for (const data of initializer.externalData) {
if (data.key === 'location') {
externalDataPathSet.add(data.value!);
}
}
}
}
externalData = [];
const externalDataPaths = [...externalDataPathSet];
for (const dataPath of externalDataPaths) {
const fullPath = path.resolve(testDataRootFolder, dataPath);
const url = path.join(TEST_DATA_BASE, path.relative(TEST_ROOT, fullPath));
externalData.push({data: url, path: dataPath});
}
}
} catch (e) {
npmlog.error('TestRunnerCli.Init.Model', `Failed to prepare test data. Error: ${inspect(e)}`);
throw e;
Expand Down Expand Up @@ -340,9 +373,23 @@ async function main() {
npmlog.verbose('TestRunnerCli.Init.Model', ` Model file: ${modelUrl}`);
npmlog.verbose('TestRunnerCli.Init.Model', ` Backend: ${backend}`);
npmlog.verbose('TestRunnerCli.Init.Model', ` Test set(s): ${cases.length} (${caseCount})`);
if (externalData) {
npmlog.verbose('TestRunnerCli.Init.Model', ` External data: ${externalData.length}`);
for (const data of externalData) {
npmlog.verbose('TestRunnerCli.Init.Model', ` - ${data.path}`);
}
}
npmlog.verbose('TestRunnerCli.Init.Model', '===============================================================');

return {name: path.basename(testDataRootFolder), platformCondition, modelUrl, backend, cases, ioBinding};
return {
name: path.basename(testDataRootFolder),
platformCondition,
modelUrl,
backend,
cases,
ioBinding,
externalData
};
}

function tryLocateModelTestFolder(searchPattern: string): string {
Expand Down
11 changes: 6 additions & 5 deletions js/web/test/test-runner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,8 @@ async function loadTensors(

async function initializeSession(
modelFilePath: string, backendHint: ort.InferenceSession.ExecutionProviderConfig, ioBindingMode: Test.IOBindingMode,
profile: boolean, sessionOptions: ort.InferenceSession.SessionOptions,
fileCache?: FileCacheBuffer): Promise<ort.InferenceSession> {
profile: boolean, externalData: ort.InferenceSession.SessionOptions['externalData'],
sessionOptions: ort.InferenceSession.SessionOptions, fileCache?: FileCacheBuffer): Promise<ort.InferenceSession> {
const preloadModelData: Uint8Array|undefined =
fileCache && fileCache[modelFilePath] ? fileCache[modelFilePath] : undefined;
Logger.verbose(
Expand All @@ -153,7 +153,8 @@ async function initializeSession(
executionProviders: [backendHint],
profiler: profilerConfig,
enableProfiling: profile,
preferredOutputLocation: ioBindingMode === 'gpu-location' ? ('gpu-buffer' as const) : undefined
preferredOutputLocation: ioBindingMode === 'gpu-location' ? ('gpu-buffer' as const) : undefined,
externalData
};

let session: ort.InferenceSession;
Expand Down Expand Up @@ -246,8 +247,8 @@ export class ModelTestContext {
const executionProviderConfig =
modelTest.backend === 'webnn' ? (testOptions?.webnnOptions || 'webnn') : modelTest.backend!;
const session = await initializeSession(
modelTest.modelUrl, executionProviderConfig, modelTest.ioBinding, profile, testOptions?.sessionOptions || {},
this.cache);
modelTest.modelUrl, executionProviderConfig, modelTest.ioBinding, profile, modelTest.externalData,
testOptions?.sessionOptions || {}, this.cache);

const initEnd = now();

Expand Down
1 change: 1 addition & 0 deletions js/web/test/test-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ export declare namespace Test {
export interface ModelTest {
name: string;
modelUrl: string;
externalData?: InferenceSession.SessionOptions['externalData'];
backend?: string; // value should be populated at build time
ioBinding: IOBindingMode;
platformCondition?: PlatformCondition;
Expand Down

0 comments on commit 50806a7

Please sign in to comment.