diff --git a/.vscode/cspell.json b/.vscode/cspell.json index 86cf79f7a6dc..d86598124573 100644 --- a/.vscode/cspell.json +++ b/.vscode/cspell.json @@ -451,6 +451,14 @@ "Whois", "whois" ] + }, + { + "filename": "sdk/documentintelligence/ai-document-intelligence-rest/review/ai-document-intelligence.api.md", + "words": [ + "wordprocessingml", + "spreadsheetml", + "presentationml" + ] } ] } diff --git a/common/config/rush/pnpm-lock.yaml b/common/config/rush/pnpm-lock.yaml index e04e807c729b..ed856921ae45 100644 --- a/common/config/rush/pnpm-lock.yaml +++ b/common/config/rush/pnpm-lock.yaml @@ -17,6 +17,9 @@ dependencies: '@rush-temp/ai-content-safety': specifier: file:./projects/ai-content-safety.tgz version: file:projects/ai-content-safety.tgz + '@rush-temp/ai-document-intelligence': + specifier: file:./projects/ai-document-intelligence.tgz + version: file:projects/ai-document-intelligence.tgz '@rush-temp/ai-document-translator': specifier: file:./projects/ai-document-translator.tgz version: file:projects/ai-document-translator.tgz @@ -4437,7 +4440,7 @@ packages: dependencies: semver: 7.5.4 shelljs: 0.8.5 - typescript: 5.4.0-dev.20231113 + typescript: 5.4.0-dev.20231116 dev: false /downlevel-dts@0.11.0: @@ -4446,7 +4449,7 @@ packages: dependencies: semver: 7.5.4 shelljs: 0.8.5 - typescript: 5.4.0-dev.20231113 + typescript: 5.4.0-dev.20231116 dev: false /eastasianwidth@0.2.0: @@ -9160,8 +9163,8 @@ packages: hasBin: true dev: false - /typescript@5.4.0-dev.20231113: - resolution: {integrity: sha512-5K165L/tImARYZDKwwT2ER9qKt0n56E8jxldXfAVpq8qNqX5o2SvpoPrzCi+eddkHJHl1gPf26xiE+7R6//1Gg==} + /typescript@5.4.0-dev.20231116: + resolution: {integrity: sha512-arT/MwHnosyqo1MQ2xN9VzwvWo1jxrhUpa1iQX1yZACfBRJb1dQ1D2MU/Ijotjr6eXw8BZ3RFdj9X/9O56iOgw==} engines: {node: '>=14.17'} hasBin: true dev: false @@ -9867,6 +9870,51 @@ packages: - utf-8-validate dev: false + file:projects/ai-document-intelligence.tgz: + resolution: {integrity: sha512-Qs9epDNNxDRI2Ev+9mXhldjZtuxu80fhX6ZrPf54udK+yVmrsrUHTpXi/v30YP9X0/58+IvgOq79HWlhbcW39w==, tarball: file:projects/ai-document-intelligence.tgz} + name: '@rush-temp/ai-document-intelligence' + version: 0.0.0 + dependencies: + '@azure/identity': 3.4.1 + '@microsoft/api-extractor': 7.38.3(@types/node@18.18.9) + '@types/chai': 4.3.10 + '@types/mocha': 10.0.4 + '@types/node': 18.18.9 + autorest: 3.6.3 + c8: 8.0.1 + chai: 4.3.10 + cross-env: 7.0.3 + dotenv: 16.3.1 + eslint: 8.53.0 + esm: 3.2.25 + karma: 6.4.2(debug@4.3.4) + karma-chrome-launcher: 3.2.0 + karma-coverage: 2.2.1 + karma-env-preprocessor: 0.1.1 + karma-firefox-launcher: 2.1.2 + karma-junit-reporter: 2.0.1(karma@6.4.2) + karma-mocha: 2.0.1 + karma-mocha-reporter: 2.2.5(karma@6.4.2) + karma-source-map-support: 1.4.0 + karma-sourcemap-loader: 0.4.0 + mkdirp: 2.1.6 + mocha: 10.2.0 + mocha-junit-reporter: 1.23.3(mocha@10.2.0) + prettier: 2.8.8 + rimraf: 5.0.5 + source-map-support: 0.5.21 + ts-node: 10.9.1(@types/node@18.18.9)(typescript@5.2.2) + tslib: 2.6.2 + typescript: 5.2.2 + transitivePeerDependencies: + - '@swc/core' + - '@swc/wasm' + - bufferutil + - debug + - supports-color + - utf-8-validate + dev: false + file:projects/ai-document-translator.tgz: resolution: {integrity: sha512-cRdyJLKq2E9Ru/00Gg40qro4VudDSN7oGJ6fziGwRA55Ho/lTH1Dq0KNBMQYuflA5mSJEUtThiNZaOZ1Qv7lOg==, tarball: file:projects/ai-document-translator.tgz} name: '@rush-temp/ai-document-translator' @@ -19095,7 +19143,7 @@ packages: dev: false file:projects/notification-hubs.tgz: - resolution: {integrity: sha512-reelA1qEW+iEwulyW+eEAvwqoXCIgKkRJ/Hix2lpAPdCC2WVan3zqMloP/hptMSnx9dv5X88uORVt5Hrn5+obA==, tarball: file:projects/notification-hubs.tgz} + resolution: {integrity: sha512-z2/A8lNpxYXE77kmOhmAxAEOehHra4Fvtfnlia7rNwlq+UNTlinS3FJylQQyTMOJV/ubJk/BDv8hyxFs+o1vOw==, tarball: file:projects/notification-hubs.tgz} name: '@rush-temp/notification-hubs' version: 0.0.0 dependencies: diff --git a/rush.json b/rush.json index da234b0c2be2..2a4815a0bbb3 100644 --- a/rush.json +++ b/rush.json @@ -310,6 +310,11 @@ "projectFolder": "sdk/agrifood/agrifood-farming-rest", "versionPolicyName": "client" }, + { + "packageName": "@azure-rest/ai-document-intelligence", + "projectFolder": "sdk/documentintelligence/ai-document-intelligence-rest", + "versionPolicyName": "client" + }, { "packageName": "@azure-rest/purview-administration", "projectFolder": "sdk/purview/purview-administration-rest", diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/.eslintrc.json b/sdk/documentintelligence/ai-document-intelligence-rest/.eslintrc.json new file mode 100644 index 000000000000..619797ac39b6 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/.eslintrc.json @@ -0,0 +1,11 @@ +{ + "plugins": ["@azure/azure-sdk"], + "extends": ["plugin:@azure/azure-sdk/azure-sdk-base"], + "rules": { + "@azure/azure-sdk/ts-modules-only-named": "warn", + "@azure/azure-sdk/ts-apiextractor-json-types": "warn", + "@azure/azure-sdk/ts-package-json-types": "warn", + "@azure/azure-sdk/ts-package-json-engine-is-present": "warn", + "tsdoc/syntax": "warn" + } +} diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/CHANGELOG.md b/sdk/documentintelligence/ai-document-intelligence-rest/CHANGELOG.md new file mode 100644 index 000000000000..eb9a2f44089f --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/CHANGELOG.md @@ -0,0 +1,8 @@ +# Release History + +## 1.0.0-beta.1 (2023-11-16) + +### Features Added + +This marks the first preview of `@azure-rest/ai-document-intelligence` Rest Level Client Library for the Azure AI Document Intelligence service (formerly known as Form Recognizer). +Please refer to the [Readme](https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/README.md) and samples for more details. diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/README.md b/sdk/documentintelligence/ai-document-intelligence-rest/README.md new file mode 100644 index 000000000000..ae4dbf22c652 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/README.md @@ -0,0 +1,224 @@ +# Azure DocumentIntelligence (formerly FormRecognizer) REST client library for JavaScript + +Extracts content, layout, and structured data from documents. + +**Please rely heavily on our [REST client docs](https://github.com/Azure/azure-sdk-for-js/blob/main/documentation/rest-clients.md) to use this library** + +Key links: + +- [Source code](https://github.com/Azure/azure-sdk-for-js/tree/main/sdk/documentintelligence/ai-document-intelligence-rest) +- [Package (NPM)](https://www.npmjs.com/package/@azure-rest/ai-document-intelligence) +- [Samples](https://github.com/Azure/azure-sdk-for-js/tree/main/sdk/documentintelligence/ai-document-intelligence-rest/samples) + +## Getting started + +### Currently supported environments + +- LTS versions of Node.js + +### Prerequisites + +- You must have an [Azure subscription](https://azure.microsoft.com/free/) to use this package. + +### Install the `@azure-rest/ai-document-intelligence` package + +Install the Azure DocumentIntelligence(formerlyFormRecognizer) REST client REST client library for JavaScript with `npm`: + +```bash +npm install @azure-rest/ai-document-intelligence +``` + +### Create and authenticate a `DocumentIntelligenceClient` + +To use an [Azure Active Directory (AAD) token credential](https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/identity/identity/samples/AzureIdentityExamples.md#authenticating-with-a-pre-fetched-access-token), +provide an instance of the desired credential type obtained from the +[@azure/identity](https://github.com/Azure/azure-sdk-for-js/tree/main/sdk/identity/identity#credentials) library. + +To authenticate with AAD, you must first `npm` install [`@azure/identity`](https://www.npmjs.com/package/@azure/identity) + +After setup, you can choose which type of [credential](https://github.com/Azure/azure-sdk-for-js/tree/main/sdk/identity/identity#credentials) from `@azure/identity` to use. +As an example, [DefaultAzureCredential](https://github.com/Azure/azure-sdk-for-js/tree/main/sdk/identity/identity#defaultazurecredential) +can be used to authenticate the client. + +Set the values of the client ID, tenant ID, and client secret of the AAD application as environment variables: +AZURE_CLIENT_ID, AZURE_TENANT_ID, AZURE_CLIENT_SECRET + +### Using a Token Credential + +```ts +import DocumentIntelligence from "@azure-rest/ai-document-intelligence"; + +const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"], + new DefaultAzureCredential() +); +``` + +### Using an API KEY + +```ts +import DocumentIntelligence from "@azure-rest/ai-document-intelligence"; + +const client = DocumentIntelligence(process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"], { + key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"], +}); +``` + +## Get Info + +```ts +const response = await client.path("/info").get(); +if (isUnexpected(response)) { + throw response.body.error; +} +console.log(response.body.customDocumentModels.limit); +// 20000 +``` + +## List Document Models + +```ts +import { paginate } from "@azure-rest/ai-document-intelligence"; +const response = await client.path("/documentModels").get(); +if (isUnexpected(response)) { + throw response.body.error; +} + +const modelsInAccount: string[] = []; +for await (const model of paginate(client, response)) { + console.log(model.modelId); +} +``` + +## Document Models + +### Analyze prebuilt-layout (urlSource) + +```ts +const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-layout") + .post({ + contentType: "application/json", + body: { + urlSource: + "https://raw.githubusercontent.com/Azure/azure-sdk-for-js/6704eff082aaaf2d97c1371a28461f512f8d748a/sdk/formrecognizer/ai-form-recognizer/assets/forms/Invoice_1.pdf", + }, + queryParameters: { locale: "en-IN" }, + }); +``` + +### Analyze prebuilt-layout (base64Source) + +```ts +import fs from "fs"; +import path from "path"; + +const filePath = path.join(ASSET_PATH, "forms", "Invoice_1.pdf"); +const base64Source = fs.readFileSync(filePath, { encoding: "base64" }); +const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-layout") + .post({ + contentType: "application/json", + body: { + base64Source, + }, + queryParameters: { locale: "en-IN" }, + }); +``` + +Continue creating the poller from initial response + +```ts +import { + getLongRunningPoller, + AnalyzeResultOperationOutput, + isUnexpected, +} from "@azure-rest/ai-document-intelligence"; + +if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; +} +const poller = await getLongRunningPoller(client, initialResponse); +const result = (await poller.pollUntilDone()).body as AnalyzeResultOperationOutput; +console.log(result); +// { +// status: 'succeeded', +// createdDateTime: '2023-11-10T13:31:31Z', +// lastUpdatedDateTime: '2023-11-10T13:31:34Z', +// analyzeResult: { +// apiVersion: '2023-10-31-preview', +// . +// . +// . +// contentFormat: 'text' +// } +// } +``` + +## Document Classifiers #Build + +```ts +import { + DocumentClassifierBuildOperationDetailsOutput, + getLongRunningPoller, + isUnexpected, +} from "@azure-rest/ai-document-intelligence"; + +const containerSasUrl = (): string => + process.env["DOCUMENT_INTELLIGENCE_TRAINING_CONTAINER_SAS_URL"]; +const initialResponse = await client.path("/documentClassifiers:build").post({ + body: { + classifierId: `customClassifier${getRandomNumber()}`, + description: "Custom classifier description", + docTypes: { + foo: { + azureBlobSource: { + containerUrl: containerSasUrl(), + }, + }, + bar: { + azureBlobSource: { + containerUrl: containerSasUrl(), + }, + }, + }, + }, +}); + +if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; +} +const poller = await getLongRunningPoller(client, initialResponse); +const response = (await poller.pollUntilDone()) + .body as DocumentClassifierBuildOperationDetailsOutput; +console.log(response); +// { +// operationId: '31466834048_f3ee629e-73fb-48ab-993b-1d55d73ca460', +// kind: 'documentClassifierBuild', +// status: 'succeeded', +// . +// . +// result: { +// classifierId: 'customClassifier10978', +// createdDateTime: '2023-11-09T12:45:56Z', +// . +// . +// description: 'Custom classifier description' +// }, +// apiVersion: '2023-10-31-preview' +// } +``` + +## Troubleshooting + +### Logging + +Enabling logging may help uncover useful information about failures. In order to see a log of HTTP requests and responses, set the `AZURE_LOG_LEVEL` environment variable to `info`. Alternatively, logging can be enabled at runtime by calling `setLogLevel` in the `@azure/logger`: + +```javascript +const { setLogLevel } = require("@azure/logger"); + +setLogLevel("info"); +``` + +For more detailed instructions on how to enable logs, you can look at the [@azure/logger package docs](https://github.com/Azure/azure-sdk-for-js/tree/main/sdk/core/logger). diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/api-extractor.json b/sdk/documentintelligence/ai-document-intelligence-rest/api-extractor.json new file mode 100644 index 000000000000..cca13e14179e --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/api-extractor.json @@ -0,0 +1,31 @@ +{ + "$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json", + "mainEntryPointFilePath": "./types/src/index.d.ts", + "docModel": { + "enabled": true + }, + "apiReport": { + "enabled": true, + "reportFolder": "./review" + }, + "dtsRollup": { + "enabled": true, + "untrimmedFilePath": "", + "publicTrimmedFilePath": "./types/ai-document-intelligence.d.ts" + }, + "messages": { + "tsdocMessageReporting": { + "default": { + "logLevel": "none" + } + }, + "extractorMessageReporting": { + "ae-missing-release-tag": { + "logLevel": "none" + }, + "ae-unresolved-link": { + "logLevel": "none" + } + } + } +} diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/assets.json b/sdk/documentintelligence/ai-document-intelligence-rest/assets.json new file mode 100644 index 000000000000..58864a124070 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/assets.json @@ -0,0 +1,6 @@ +{ + "AssetsRepo": "Azure/azure-sdk-assets", + "AssetsRepoPrefixPath": "js", + "TagPrefix": "js/documentintelligence/ai-document-intelligence-rest", + "Tag": "js/documentintelligence/ai-document-intelligence-rest_e0c09ce1b6" +} diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/assets/businessCard/business-card-english.jpg b/sdk/documentintelligence/ai-document-intelligence-rest/assets/businessCard/business-card-english.jpg new file mode 100644 index 000000000000..7c2bef76ed3e Binary files /dev/null and b/sdk/documentintelligence/ai-document-intelligence-rest/assets/businessCard/business-card-english.jpg differ diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/assets/forms/Form_1.jpg b/sdk/documentintelligence/ai-document-intelligence-rest/assets/forms/Form_1.jpg new file mode 100644 index 000000000000..29cae664f1b8 Binary files /dev/null and b/sdk/documentintelligence/ai-document-intelligence-rest/assets/forms/Form_1.jpg differ diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/assets/forms/Invoice_1.pdf b/sdk/documentintelligence/ai-document-intelligence-rest/assets/forms/Invoice_1.pdf new file mode 100644 index 000000000000..5ffff2960d74 Binary files /dev/null and b/sdk/documentintelligence/ai-document-intelligence-rest/assets/forms/Invoice_1.pdf differ diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/assets/forms/Invoice_1.tiff b/sdk/documentintelligence/ai-document-intelligence-rest/assets/forms/Invoice_1.tiff new file mode 100644 index 000000000000..224fb82205b0 Binary files /dev/null and b/sdk/documentintelligence/ai-document-intelligence-rest/assets/forms/Invoice_1.tiff differ diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/assets/forms/selection_mark_form.pdf b/sdk/documentintelligence/ai-document-intelligence-rest/assets/forms/selection_mark_form.pdf new file mode 100644 index 000000000000..0721647fa52b Binary files /dev/null and b/sdk/documentintelligence/ai-document-intelligence-rest/assets/forms/selection_mark_form.pdf differ diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/assets/healthInsuranceCard/insurance.png b/sdk/documentintelligence/ai-document-intelligence-rest/assets/healthInsuranceCard/insurance.png new file mode 100644 index 000000000000..676c8a231d1e Binary files /dev/null and b/sdk/documentintelligence/ai-document-intelligence-rest/assets/healthInsuranceCard/insurance.png differ diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/assets/identityDocument/license.png b/sdk/documentintelligence/ai-document-intelligence-rest/assets/identityDocument/license.png new file mode 100644 index 000000000000..661312305a45 Binary files /dev/null and b/sdk/documentintelligence/ai-document-intelligence-rest/assets/identityDocument/license.png differ diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/assets/invoice/Invoice_1.pdf b/sdk/documentintelligence/ai-document-intelligence-rest/assets/invoice/Invoice_1.pdf new file mode 100644 index 000000000000..5ffff2960d74 Binary files /dev/null and b/sdk/documentintelligence/ai-document-intelligence-rest/assets/invoice/Invoice_1.pdf differ diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/assets/invoice/sample_invoice.jpg b/sdk/documentintelligence/ai-document-intelligence-rest/assets/invoice/sample_invoice.jpg new file mode 100644 index 000000000000..6f8796469d78 Binary files /dev/null and b/sdk/documentintelligence/ai-document-intelligence-rest/assets/invoice/sample_invoice.jpg differ diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/assets/receipt/contoso-allinone.jpg b/sdk/documentintelligence/ai-document-intelligence-rest/assets/receipt/contoso-allinone.jpg new file mode 100644 index 000000000000..1aaad34387ec Binary files /dev/null and b/sdk/documentintelligence/ai-document-intelligence-rest/assets/receipt/contoso-allinone.jpg differ diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/assets/receipt/contoso-receipt.png b/sdk/documentintelligence/ai-document-intelligence-rest/assets/receipt/contoso-receipt.png new file mode 100644 index 000000000000..1f9fcbf60a04 Binary files /dev/null and b/sdk/documentintelligence/ai-document-intelligence-rest/assets/receipt/contoso-receipt.png differ diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/assets/receipt/multipage_invoice1.pdf b/sdk/documentintelligence/ai-document-intelligence-rest/assets/receipt/multipage_invoice1.pdf new file mode 100644 index 000000000000..5ac1edf588c9 Binary files /dev/null and b/sdk/documentintelligence/ai-document-intelligence-rest/assets/receipt/multipage_invoice1.pdf differ diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/assets/vaccinationCard/vaccination.jpg b/sdk/documentintelligence/ai-document-intelligence-rest/assets/vaccinationCard/vaccination.jpg new file mode 100644 index 000000000000..4b5f7d10dedc Binary files /dev/null and b/sdk/documentintelligence/ai-document-intelligence-rest/assets/vaccinationCard/vaccination.jpg differ diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/assets/w2/w2-multiple.png b/sdk/documentintelligence/ai-document-intelligence-rest/assets/w2/w2-multiple.png new file mode 100644 index 000000000000..df2af007a02c Binary files /dev/null and b/sdk/documentintelligence/ai-document-intelligence-rest/assets/w2/w2-multiple.png differ diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/assets/w2/w2-single.png b/sdk/documentintelligence/ai-document-intelligence-rest/assets/w2/w2-single.png new file mode 100644 index 000000000000..3d282db03149 Binary files /dev/null and b/sdk/documentintelligence/ai-document-intelligence-rest/assets/w2/w2-single.png differ diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/karma.conf.js b/sdk/documentintelligence/ai-document-intelligence-rest/karma.conf.js new file mode 100644 index 000000000000..a9d5f1b5fc59 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/karma.conf.js @@ -0,0 +1,133 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +// https://github.com/karma-runner/karma-chrome-launcher +process.env.CHROME_BIN = require("puppeteer").executablePath(); +require("dotenv").config(); +const { relativeRecordingsPath } = require("@azure-tools/test-recorder"); +process.env.RECORDINGS_RELATIVE_PATH = relativeRecordingsPath(); + +module.exports = function (config) { + config.set({ + // base path that will be used to resolve all patterns (eg. files, exclude) + basePath: "./", + + // frameworks to use + // available frameworks: https://npmjs.org/browse/keyword/karma-adapter + frameworks: ["source-map-support", "mocha"], + + plugins: [ + "karma-mocha", + "karma-mocha-reporter", + "karma-chrome-launcher", + "karma-firefox-launcher", + "karma-env-preprocessor", + "karma-coverage", + "karma-sourcemap-loader", + "karma-junit-reporter", + "karma-source-map-support", + ], + + // list of files / patterns to load in the browser + files: [ + "dist-test/index.browser.js", + { + pattern: "dist-test/index.browser.js.map", + type: "html", + included: false, + served: true, + }, + ], + + // list of files / patterns to exclude + exclude: [], + + // preprocess matching files before serving them to the browser + // available preprocessors: https://npmjs.org/browse/keyword/karma-preprocessor + preprocessors: { + "**/*.js": ["sourcemap", "env"], + // IMPORTANT: COMMENT following line if you want to debug in your browsers!! + // Preprocess source file to calculate code coverage, however this will make source file unreadable + // "dist-test/index.js": ["coverage"] + }, + + envPreprocessor: [ + "TEST_MODE", + "ENDPOINT", + "AZURE_CLIENT_SECRET", + "AZURE_CLIENT_ID", + "AZURE_TENANT_ID", + "SUBSCRIPTION_ID", + "RECORDINGS_RELATIVE_PATH", + ], + + // test results reporter to use + // possible values: 'dots', 'progress' + // available reporters: https://npmjs.org/browse/keyword/karma-reporter + reporters: ["mocha", "coverage", "junit"], + + coverageReporter: { + // specify a common output directory + dir: "coverage-browser/", + reporters: [ + { type: "json", subdir: ".", file: "coverage.json" }, + { type: "lcovonly", subdir: ".", file: "lcov.info" }, + { type: "html", subdir: "html" }, + { type: "cobertura", subdir: ".", file: "cobertura-coverage.xml" }, + ], + }, + + junitReporter: { + outputDir: "", // results will be saved as $outputDir/$browserName.xml + outputFile: "test-results.browser.xml", // if included, results will be saved as $outputDir/$browserName/$outputFile + suite: "", // suite will become the package name attribute in xml testsuite element + useBrowserName: false, // add browser name to report and classes names + nameFormatter: undefined, // function (browser, result) to customize the name attribute in xml testcase element + classNameFormatter: undefined, // function (browser, result) to customize the classname attribute in xml testcase element + properties: {}, // key value pair of properties to add to the section of the report + }, + + // web server port + port: 9876, + + // enable / disable colors in the output (reporters and logs) + colors: true, + + // level of logging + // possible values: config.LOG_DISABLE || config.LOG_ERROR || config.LOG_WARN || config.LOG_INFO || config.LOG_DEBUG + logLevel: config.LOG_INFO, + + // enable / disable watching file and executing tests whenever any file changes + autoWatch: false, + + // --no-sandbox allows our tests to run in Linux without having to change the system. + // --disable-web-security allows us to authenticate from the browser without having to write tests using interactive auth, which would be far more complex. + browsers: ["ChromeHeadlessNoSandbox"], + customLaunchers: { + ChromeHeadlessNoSandbox: { + base: "ChromeHeadless", + flags: ["--no-sandbox", "--disable-web-security"], + }, + }, + + // Continuous Integration mode + // if true, Karma captures browsers, runs the tests and exits + singleRun: false, + + // Concurrency level + // how many browser should be started simultaneous + concurrency: 1, + + browserNoActivityTimeout: 60000000, + browserDisconnectTimeout: 10000, + browserDisconnectTolerance: 3, + + client: { + mocha: { + // change Karma's debug.html to the mocha web reporter + reporter: "html", + timeout: "600000", + }, + }, + }); +}; diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/package.json b/sdk/documentintelligence/ai-document-intelligence-rest/package.json new file mode 100644 index 000000000000..f231ae6e6a90 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/package.json @@ -0,0 +1,127 @@ +{ + "name": "@azure-rest/ai-document-intelligence", + "sdk-type": "client", + "author": "Microsoft Corporation", + "version": "1.0.0-beta.1", + "description": "Azure Document Intelligence Rest Client", + "keywords": [ + "node", + "azure", + "cloud", + "typescript", + "browser", + "isomorphic" + ], + "license": "MIT", + "main": "dist/index.js", + "module": "./dist-esm/src/index.js", + "types": "./types/ai-document-intelligence.d.ts", + "repository": "github:Azure/azure-sdk-for-js", + "bugs": { + "url": "https://github.com/Azure/azure-sdk-for-js/issues" + }, + "files": [ + "dist/", + "dist-esm/src/", + "types/ai-document-intelligence.d.ts", + "README.md", + "LICENSE", + "review/*" + ], + "engines": { + "node": ">=18.0.0" + }, + "scripts": { + "audit": "node ../../../common/scripts/rush-audit.js && rimraf node_modules package-lock.json && npm i --package-lock-only 2>&1 && npm audit", + "build:browser": "tsc -p . && cross-env ONLY_BROWSER=true rollup -c 2>&1", + "build:node": "tsc -p . && cross-env ONLY_NODE=true rollup -c 2>&1", + "build:samples": "echo skipped.", + "build:test": "tsc -p . && dev-tool run bundle", + "build:debug": "tsc -p . && dev-tool run bundle && api-extractor run --local", + "check-format": "prettier --list-different --config ../../../.prettierrc.json --ignore-path ../../../.prettierignore \"src/**/*.ts\" \"*.{js,json}\" \"test/**/*.ts\"", + "clean": "rimraf --glob dist dist-browser dist-esm test-dist temp types *.tgz *.log", + "execute:samples": "echo skipped", + "extract-api": "rimraf review && mkdirp ./review && api-extractor run --local", + "format": "prettier --write --config ../../../.prettierrc.json --ignore-path ../../../.prettierignore \"src/**/*.ts\" \"*.{js,json}\" \"test/**/*.ts\"", + "generate:client": "echo skipped", + "integration-test:browser": "echo skipped", + "integration-test:node": "dev-tool run test:node-js-input -- --timeout 5000000 'dist-esm/test/**/*.spec.js'", + "integration-test": "npm run integration-test:node && npm run integration-test:browser", + "lint:fix": "eslint package.json api-extractor.json src test --ext .ts --fix --fix-type [problem,suggestion]", + "lint": "eslint package.json api-extractor.json src test --ext .ts", + "pack": "npm pack 2>&1", + "test:browser": "echo skipped", + "test:node": "npm run clean && npm run build:test && npm run unit-test:node", + "test": "npm run clean && npm run build:test && npm run unit-test", + "unit-test": "npm run unit-test:node && npm run unit-test:browser", + "unit-test:node": "dev-tool run test:node-ts-input -- --timeout 1200000 --exclude 'test/**/browser/*.spec.ts' 'test/**/*.spec.ts'", + "unit-test:browser": "echo skipped", + "build": "npm run clean && tsc -p . && dev-tool run bundle && mkdirp ./review && api-extractor run --local" + }, + "sideEffects": false, + "autoPublish": false, + "dependencies": { + "@azure/core-auth": "^1.3.0", + "@azure-rest/core-client": "^1.1.4", + "@azure/core-rest-pipeline": "^1.12.0", + "@azure/logger": "^1.0.0", + "tslib": "^2.2.0", + "@azure/core-paging": "^1.5.0", + "@azure/core-lro": "^2.5.4", + "@azure/abort-controller": "^1.0.0" + }, + "devDependencies": { + "@microsoft/api-extractor": "^7.31.1", + "autorest": "latest", + "@types/node": "^18.0.0", + "dotenv": "^16.0.0", + "eslint": "^8.0.0", + "mkdirp": "^2.1.2", + "prettier": "^2.5.1", + "rimraf": "^5.0.0", + "source-map-support": "^0.5.9", + "typescript": "~5.2.0", + "@azure/dev-tool": "^1.0.0", + "@azure/eslint-plugin-azure-sdk": "^3.0.0", + "@azure-tools/test-credential": "^1.0.0", + "@azure/identity": "^3.3.0", + "@azure-tools/test-recorder": "^3.0.0", + "mocha": "^10.0.0", + "esm": "^3.2.18", + "@types/mocha": "^10.0.0", + "mocha-junit-reporter": "^1.18.0", + "cross-env": "^7.0.2", + "@types/chai": "^4.2.8", + "chai": "^4.2.0", + "karma-chrome-launcher": "^3.0.0", + "karma-coverage": "^2.0.0", + "karma-env-preprocessor": "^0.1.1", + "karma-firefox-launcher": "^2.1.2", + "karma-junit-reporter": "^2.0.1", + "karma-mocha-reporter": "^2.2.5", + "karma-mocha": "^2.0.1", + "karma-source-map-support": "~1.4.0", + "karma-sourcemap-loader": "^0.4.0", + "karma": "^6.2.0", + "c8": "^8.0.0", + "ts-node": "^10.0.0" + }, + "homepage": "https://github.com/Azure/azure-sdk-for-js/tree/main/sdk/documentintelligence/ai-document-intelligence-rest/README.md", + "//metadata": { + "constantPaths": [ + { + "path": "src/documentIntelligence.ts", + "prefix": "userAgentInfo" + } + ] + }, + "//sampleConfiguration": { + "productName": "Azure Document Intelligence Rest Client", + "productSlugs": [ + "azure", + "document-intelligence", + "ai-document-intelligence" + ], + "disableDocsMs": true + } +} diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/review/ai-document-intelligence.api.md b/sdk/documentintelligence/ai-document-intelligence-rest/review/ai-document-intelligence.api.md new file mode 100644 index 000000000000..321c47d68ca1 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/review/ai-document-intelligence.api.md @@ -0,0 +1,1411 @@ +## API Report File for "@azure-rest/ai-document-intelligence" + +> Do not edit this file. It is a report generated by [API Extractor](https://api-extractor.com/). + +```ts + +/// + +import { Client } from '@azure-rest/core-client'; +import { ClientOptions } from '@azure-rest/core-client'; +import { CreateHttpPollerOptions } from '@azure/core-lro'; +import { HttpResponse } from '@azure-rest/core-client'; +import { KeyCredential } from '@azure/core-auth'; +import { OperationState } from '@azure/core-lro'; +import { Paged } from '@azure/core-paging'; +import { PagedAsyncIterableIterator } from '@azure/core-paging'; +import { PathUncheckedResponse } from '@azure-rest/core-client'; +import { RawHttpHeaders } from '@azure/core-rest-pipeline'; +import { RequestParameters } from '@azure-rest/core-client'; +import { SimplePollerLike } from '@azure/core-lro'; +import { StreamableMethod } from '@azure-rest/core-client'; +import { TokenCredential } from '@azure/core-auth'; + +// @public +export interface AddressValueOutput { + city?: string; + cityDistrict?: string; + countryRegion?: string; + house?: string; + houseNumber?: string; + level?: string; + poBox?: string; + postalCode?: string; + road?: string; + state?: string; + stateDistrict?: string; + streetAddress?: string; + suburb?: string; + unit?: string; +} + +// @public (undocumented) +export interface AnalyzeDocument202Headers { + // (undocumented) + "operation-location": string; +} + +// @public +export interface AnalyzeDocument202Response extends HttpResponse { + // (undocumented) + headers: RawHttpHeaders & AnalyzeDocument202Headers; + // (undocumented) + status: "202"; +} + +// @public (undocumented) +export interface AnalyzeDocumentBodyParam { + body?: AnalyzeDocumentRequest; +} + +// @public (undocumented) +export interface AnalyzeDocumentDefaultResponse extends HttpResponse { + // (undocumented) + body: ErrorResponseOutput; + // (undocumented) + status: string; +} + +// @public (undocumented) +export interface AnalyzeDocumentFromStream { + post(options: AnalyzeDocumentFromStreamParameters): StreamableMethod; + post(options: AnalyzeDocumentParameters): StreamableMethod; +} + +// @public (undocumented) +export interface AnalyzeDocumentFromStream202Headers { + // (undocumented) + "operation-location": string; +} + +// @public +export interface AnalyzeDocumentFromStream202Response extends HttpResponse { + // (undocumented) + headers: RawHttpHeaders & AnalyzeDocumentFromStream202Headers; + // (undocumented) + status: "202"; +} + +// @public (undocumented) +export interface AnalyzeDocumentFromStreamBodyParam { + body: string | Uint8Array | ReadableStream | NodeJS.ReadableStream; +} + +// @public (undocumented) +export interface AnalyzeDocumentFromStreamDefaultResponse extends HttpResponse { + // (undocumented) + body: ErrorResponseOutput; + // (undocumented) + status: string; +} + +// @public +export interface AnalyzeDocumentFromStreamLogicalResponse extends HttpResponse { + // (undocumented) + status: "200"; +} + +// @public (undocumented) +export interface AnalyzeDocumentFromStreamMediaTypesParam { + contentType: "application/octet-stream" | "application/pdf" | "image/jpeg" | "image/png" | "image/tiff" | "image/bmp" | "image/heif" | "text/html" | "application/vnd.openxmlformats-officedocument.wordprocessingml.document" | "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | "application/vnd.openxmlformats-officedocument.presentationml.presentation"; +} + +// @public (undocumented) +export type AnalyzeDocumentFromStreamParameters = AnalyzeDocumentFromStreamQueryParam & AnalyzeDocumentFromStreamMediaTypesParam & AnalyzeDocumentFromStreamBodyParam & RequestParameters; + +// @public (undocumented) +export interface AnalyzeDocumentFromStreamQueryParam { + // (undocumented) + queryParameters?: AnalyzeDocumentFromStreamQueryParamProperties; +} + +// @public (undocumented) +export interface AnalyzeDocumentFromStreamQueryParamProperties { + features?: string[]; + locale?: string; + outputContentFormat?: string; + pages?: string; + queryFields?: string[]; + stringIndexType?: string; +} + +// @public +export interface AnalyzeDocumentLogicalResponse extends HttpResponse { + // (undocumented) + status: "200"; +} + +// @public (undocumented) +export interface AnalyzeDocumentMediaTypesParam { + contentType: "application/json"; +} + +// @public (undocumented) +export type AnalyzeDocumentParameters = AnalyzeDocumentQueryParam & AnalyzeDocumentMediaTypesParam & AnalyzeDocumentBodyParam & RequestParameters; + +// @public (undocumented) +export interface AnalyzeDocumentQueryParam { + // (undocumented) + queryParameters?: AnalyzeDocumentQueryParamProperties; +} + +// @public (undocumented) +export interface AnalyzeDocumentQueryParamProperties { + features?: string[]; + locale?: string; + outputContentFormat?: string; + pages?: string; + queryFields?: string[]; + stringIndexType?: string; +} + +// @public +export interface AnalyzeDocumentRequest { + base64Source?: string; + urlSource?: string; +} + +// @public +export interface AnalyzeResultOperationOutput { + analyzeResult?: AnalyzeResultOutput; + createdDateTime: string; + error?: ErrorModelOutput; + lastUpdatedDateTime: string; + status: string; +} + +// @public +export interface AnalyzeResultOutput { + apiVersion: string; + content: string; + contentFormat?: string; + documents?: Array; + figures?: Array; + keyValuePairs?: Array; + languages?: Array; + lists?: Array; + modelId: string; + pages: Array; + paragraphs?: Array; + sections?: Array; + stringIndexType: string; + styles?: Array; + tables?: Array; +} + +// @public +export interface AuthorizeCopyRequest { + description?: string; + modelId: string; + tags?: Record; +} + +// @public (undocumented) +export interface AuthorizeModelCopy { + post(options: AuthorizeModelCopyParameters): StreamableMethod; +} + +// @public +export interface AuthorizeModelCopy200Response extends HttpResponse { + // (undocumented) + body: CopyAuthorizationOutput; + // (undocumented) + status: "200"; +} + +// @public (undocumented) +export interface AuthorizeModelCopyBodyParam { + body: AuthorizeCopyRequest; +} + +// @public (undocumented) +export interface AuthorizeModelCopyDefaultResponse extends HttpResponse { + // (undocumented) + body: ErrorResponseOutput; + // (undocumented) + status: string; +} + +// @public (undocumented) +export type AuthorizeModelCopyParameters = AuthorizeModelCopyBodyParam & RequestParameters; + +// @public +export interface AzureBlobContentSource { + containerUrl: string; + prefix?: string; +} + +// @public +export interface AzureBlobContentSourceOutput { + containerUrl: string; + prefix?: string; +} + +// @public +export interface AzureBlobFileListContentSource { + containerUrl: string; + fileList: string; +} + +// @public +export interface AzureBlobFileListContentSourceOutput { + containerUrl: string; + fileList: string; +} + +// @public +export interface BoundingRegionOutput { + pageNumber: number; + polygon: number[]; +} + +// @public (undocumented) +export interface BuildClassifier { + post(options: BuildClassifierParameters): StreamableMethod; +} + +// @public (undocumented) +export interface BuildClassifier202Headers { + // (undocumented) + "operation-location": string; +} + +// @public +export interface BuildClassifier202Response extends HttpResponse { + // (undocumented) + headers: RawHttpHeaders & BuildClassifier202Headers; + // (undocumented) + status: "202"; +} + +// @public (undocumented) +export interface BuildClassifierBodyParam { + body: BuildDocumentClassifierRequest; +} + +// @public (undocumented) +export interface BuildClassifierDefaultResponse extends HttpResponse { + // (undocumented) + body: ErrorResponseOutput; + // (undocumented) + status: string; +} + +// @public +export interface BuildClassifierLogicalResponse extends HttpResponse { + // (undocumented) + status: "200"; +} + +// @public (undocumented) +export type BuildClassifierParameters = BuildClassifierBodyParam & RequestParameters; + +// @public +export interface BuildDocumentClassifierRequest { + classifierId: string; + description?: string; + docTypes: Record; +} + +// @public +export interface BuildDocumentModelRequest { + azureBlobFileListSource?: AzureBlobFileListContentSource; + azureBlobSource?: AzureBlobContentSource; + buildMode: string; + description?: string; + modelId: string; + tags?: Record; +} + +// @public (undocumented) +export interface BuildModel { + post(options: BuildModelParameters): StreamableMethod; +} + +// @public (undocumented) +export interface BuildModel202Headers { + // (undocumented) + "operation-location": string; +} + +// @public +export interface BuildModel202Response extends HttpResponse { + // (undocumented) + headers: RawHttpHeaders & BuildModel202Headers; + // (undocumented) + status: "202"; +} + +// @public (undocumented) +export interface BuildModelBodyParam { + body: BuildDocumentModelRequest; +} + +// @public (undocumented) +export interface BuildModelDefaultResponse extends HttpResponse { + // (undocumented) + body: ErrorResponseOutput; + // (undocumented) + status: string; +} + +// @public +export interface BuildModelLogicalResponse extends HttpResponse { + // (undocumented) + status: "200"; +} + +// @public (undocumented) +export type BuildModelParameters = BuildModelBodyParam & RequestParameters; + +// @public +export interface ClassifierDocumentTypeDetails { + azureBlobFileListSource?: AzureBlobFileListContentSource; + azureBlobSource?: AzureBlobContentSource; + sourceKind?: string; +} + +// @public +export interface ClassifierDocumentTypeDetailsOutput { + azureBlobFileListSource?: AzureBlobFileListContentSourceOutput; + azureBlobSource?: AzureBlobContentSourceOutput; + sourceKind?: string; +} + +// @public (undocumented) +export interface ClassifyDocument202Headers { + // (undocumented) + "operation-location": string; +} + +// @public +export interface ClassifyDocument202Response extends HttpResponse { + // (undocumented) + headers: RawHttpHeaders & ClassifyDocument202Headers; + // (undocumented) + status: "202"; +} + +// @public (undocumented) +export interface ClassifyDocumentBodyParam { + body: ClassifyDocumentRequest; +} + +// @public (undocumented) +export interface ClassifyDocumentDefaultResponse extends HttpResponse { + // (undocumented) + body: ErrorResponseOutput; + // (undocumented) + status: string; +} + +// @public (undocumented) +export interface ClassifyDocumentFromStream { + post(options: ClassifyDocumentFromStreamParameters): StreamableMethod; + post(options: ClassifyDocumentParameters): StreamableMethod; +} + +// @public (undocumented) +export interface ClassifyDocumentFromStream202Headers { + // (undocumented) + "operation-location": string; +} + +// @public +export interface ClassifyDocumentFromStream202Response extends HttpResponse { + // (undocumented) + headers: RawHttpHeaders & ClassifyDocumentFromStream202Headers; + // (undocumented) + status: "202"; +} + +// @public (undocumented) +export interface ClassifyDocumentFromStreamBodyParam { + body: string | Uint8Array | ReadableStream | NodeJS.ReadableStream; +} + +// @public (undocumented) +export interface ClassifyDocumentFromStreamDefaultResponse extends HttpResponse { + // (undocumented) + body: ErrorResponseOutput; + // (undocumented) + status: string; +} + +// @public +export interface ClassifyDocumentFromStreamLogicalResponse extends HttpResponse { + // (undocumented) + status: "200"; +} + +// @public (undocumented) +export interface ClassifyDocumentFromStreamMediaTypesParam { + contentType: "application/octet-stream" | "application/pdf" | "image/jpeg" | "image/png" | "image/tiff" | "image/bmp" | "image/heif" | "text/html" | "application/vnd.openxmlformats-officedocument.wordprocessingml.document" | "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | "application/vnd.openxmlformats-officedocument.presentationml.presentation"; +} + +// @public (undocumented) +export type ClassifyDocumentFromStreamParameters = ClassifyDocumentFromStreamQueryParam & ClassifyDocumentFromStreamMediaTypesParam & ClassifyDocumentFromStreamBodyParam & RequestParameters; + +// @public (undocumented) +export interface ClassifyDocumentFromStreamQueryParam { + // (undocumented) + queryParameters?: ClassifyDocumentFromStreamQueryParamProperties; +} + +// @public (undocumented) +export interface ClassifyDocumentFromStreamQueryParamProperties { + split?: string; + stringIndexType?: string; +} + +// @public +export interface ClassifyDocumentLogicalResponse extends HttpResponse { + // (undocumented) + status: "200"; +} + +// @public (undocumented) +export interface ClassifyDocumentMediaTypesParam { + contentType: "application/json"; +} + +// @public (undocumented) +export type ClassifyDocumentParameters = ClassifyDocumentQueryParam & ClassifyDocumentMediaTypesParam & ClassifyDocumentBodyParam & RequestParameters; + +// @public (undocumented) +export interface ClassifyDocumentQueryParam { + // (undocumented) + queryParameters?: ClassifyDocumentQueryParamProperties; +} + +// @public (undocumented) +export interface ClassifyDocumentQueryParamProperties { + split?: string; + stringIndexType?: string; +} + +// @public +export interface ClassifyDocumentRequest { + base64Source?: string; + urlSource?: string; +} + +// @public +export interface ComponentDocumentModelDetails { + modelId: string; +} + +// @public +export interface ComposeDocumentModelRequest { + componentModels: Array; + description?: string; + modelId: string; + tags?: Record; +} + +// @public (undocumented) +export interface ComposeModel { + post(options: ComposeModelParameters): StreamableMethod; +} + +// @public (undocumented) +export interface ComposeModel202Headers { + // (undocumented) + "operation-location": string; +} + +// @public +export interface ComposeModel202Response extends HttpResponse { + // (undocumented) + headers: RawHttpHeaders & ComposeModel202Headers; + // (undocumented) + status: "202"; +} + +// @public (undocumented) +export interface ComposeModelBodyParam { + body: ComposeDocumentModelRequest; +} + +// @public (undocumented) +export interface ComposeModelDefaultResponse extends HttpResponse { + // (undocumented) + body: ErrorResponseOutput; + // (undocumented) + status: string; +} + +// @public +export interface ComposeModelLogicalResponse extends HttpResponse { + // (undocumented) + status: "200"; +} + +// @public (undocumented) +export type ComposeModelParameters = ComposeModelBodyParam & RequestParameters; + +// @public +export interface CopyAuthorization { + accessToken: string; + expirationDateTime: Date | string; + targetModelId: string; + targetModelLocation: string; + targetResourceId: string; + targetResourceRegion: string; +} + +// @public +export interface CopyAuthorizationOutput { + accessToken: string; + expirationDateTime: string; + targetModelId: string; + targetModelLocation: string; + targetResourceId: string; + targetResourceRegion: string; +} + +// @public (undocumented) +export interface CopyModelTo { + post(options: CopyModelToParameters): StreamableMethod; +} + +// @public (undocumented) +export interface CopyModelTo202Headers { + // (undocumented) + "operation-location": string; +} + +// @public +export interface CopyModelTo202Response extends HttpResponse { + // (undocumented) + headers: RawHttpHeaders & CopyModelTo202Headers; + // (undocumented) + status: "202"; +} + +// @public (undocumented) +export interface CopyModelToBodyParam { + body: CopyAuthorization; +} + +// @public (undocumented) +export interface CopyModelToDefaultResponse extends HttpResponse { + // (undocumented) + body: ErrorResponseOutput; + // (undocumented) + status: string; +} + +// @public +export interface CopyModelToLogicalResponse extends HttpResponse { + // (undocumented) + status: "200"; +} + +// @public (undocumented) +export type CopyModelToParameters = CopyModelToBodyParam & RequestParameters; + +// @public +function createClient(endpoint: string, credentials: TokenCredential | KeyCredential, options?: ClientOptions): DocumentIntelligenceClient; +export default createClient; + +// @public +export interface CurrencyValueOutput { + amount: number; + currencyCode?: string; + currencySymbol?: string; +} + +// @public +export interface CustomDocumentModelsDetailsOutput { + count: number; + limit: number; +} + +// @public +export interface DeleteClassifier204Response extends HttpResponse { + // (undocumented) + status: "204"; +} + +// @public (undocumented) +export interface DeleteClassifierDefaultResponse extends HttpResponse { + // (undocumented) + body: ErrorResponseOutput; + // (undocumented) + status: string; +} + +// @public (undocumented) +export type DeleteClassifierParameters = RequestParameters; + +// @public +export interface DeleteModel204Response extends HttpResponse { + // (undocumented) + status: "204"; +} + +// @public (undocumented) +export interface DeleteModelDefaultResponse extends HttpResponse { + // (undocumented) + body: ErrorResponseOutput; + // (undocumented) + status: string; +} + +// @public (undocumented) +export type DeleteModelParameters = RequestParameters; + +// @public +export interface DocumentBarcodeOutput { + confidence: number; + kind: string; + polygon?: number[]; + span: DocumentSpanOutput; + value: string; +} + +// @public +export interface DocumentCaptionOutput { + boundingRegions?: Array; + content: string; + elements?: string[]; + spans: Array; +} + +// @public +export interface DocumentClassifierBuildOperationDetailsOutput extends OperationDetailsOutputParent { + kind: "documentClassifierBuild"; + result?: DocumentClassifierDetailsOutput; +} + +// @public +export interface DocumentClassifierDetailsOutput { + apiVersion: string; + classifierId: string; + createdDateTime: string; + description?: string; + docTypes: Record; + expirationDateTime?: string; +} + +// @public +export interface DocumentFieldOutput { + boundingRegions?: Array; + confidence?: number; + content?: string; + spans?: Array; + type: string; + valueAddress?: AddressValueOutput; + valueArray?: Array; + valueBoolean?: boolean; + valueCountryRegion?: string; + valueCurrency?: CurrencyValueOutput; + valueDate?: string; + valueInteger?: number; + valueNumber?: number; + valueObject?: Record; + valuePhoneNumber?: string; + valueSelectionMark?: string; + valueSignature?: string; + valueString?: string; + valueTime?: string; +} + +// @public +export interface DocumentFieldSchemaOutput { + description?: string; + example?: string; + items?: DocumentFieldSchemaOutput; + properties?: Record; + type: string; +} + +// @public +export interface DocumentFigureOutput { + boundingRegions?: Array; + caption?: DocumentCaptionOutput; + elements?: string[]; + footnotes?: Array; + spans: Array; +} + +// @public +export interface DocumentFootnoteOutput { + boundingRegions?: Array; + content: string; + elements?: string[]; + spans: Array; +} + +// @public +export interface DocumentFormulaOutput { + confidence: number; + kind: string; + polygon?: number[]; + span: DocumentSpanOutput; + value: string; +} + +// @public (undocumented) +export type DocumentIntelligenceClient = Client & { + path: Routes; +}; + +// @public +export interface DocumentKeyValueElementOutput { + boundingRegions?: Array; + content: string; + spans: Array; +} + +// @public +export interface DocumentKeyValuePairOutput { + confidence: number; + key: DocumentKeyValueElementOutput; + value?: DocumentKeyValueElementOutput; +} + +// @public +export interface DocumentLanguageOutput { + confidence: number; + locale: string; + spans: Array; +} + +// @public +export interface DocumentLineOutput { + content: string; + polygon?: number[]; + spans: Array; +} + +// @public +export interface DocumentListItemOutput { + boundingRegions?: Array; + content: string; + elements?: string[]; + level: number; + spans: Array; +} + +// @public +export interface DocumentListOutput { + items: Array; + spans: Array; +} + +// @public +export interface DocumentModelBuildOperationDetailsOutput extends OperationDetailsOutputParent { + kind: "documentModelBuild"; + result?: DocumentModelDetailsOutput; +} + +// @public +export interface DocumentModelComposeOperationDetailsOutput extends OperationDetailsOutputParent { + kind: "documentModelCompose"; + result?: DocumentModelDetailsOutput; +} + +// @public +export interface DocumentModelCopyToOperationDetailsOutput extends OperationDetailsOutputParent { + kind: "documentModelCopyTo"; + result?: DocumentModelDetailsOutput; +} + +// @public +export interface DocumentModelDetailsOutput { + apiVersion?: string; + azureBlobFileListSource?: AzureBlobFileListContentSourceOutput; + azureBlobSource?: AzureBlobContentSourceOutput; + buildMode?: string; + createdDateTime: string; + description?: string; + docTypes?: Record; + expirationDateTime?: string; + modelId: string; + tags?: Record; +} + +// @public +export interface DocumentOutput { + boundingRegions?: Array; + confidence: number; + docType: string; + fields?: Record; + spans: Array; +} + +// @public +export interface DocumentPageOutput { + angle?: number; + barcodes?: Array; + formulas?: Array; + height?: number; + lines?: Array; + pageNumber: number; + selectionMarks?: Array; + spans: Array; + unit?: string; + width?: number; + words?: Array; +} + +// @public +export interface DocumentParagraphOutput { + boundingRegions?: Array; + content: string; + role?: string; + spans: Array; +} + +// @public +export interface DocumentSectionOutput { + elements?: string[]; + spans: Array; +} + +// @public +export interface DocumentSelectionMarkOutput { + confidence: number; + polygon?: number[]; + span: DocumentSpanOutput; + state: string; +} + +// @public +export interface DocumentSpanOutput { + length: number; + offset: number; +} + +// @public +export interface DocumentStyleOutput { + backgroundColor?: string; + color?: string; + confidence: number; + fontStyle?: string; + fontWeight?: string; + isHandwritten?: boolean; + similarFontFamily?: string; + spans: Array; +} + +// @public +export interface DocumentTableCellOutput { + boundingRegions?: Array; + columnIndex: number; + columnSpan?: number; + content: string; + elements?: string[]; + kind?: string; + rowIndex: number; + rowSpan?: number; + spans: Array; +} + +// @public +export interface DocumentTableOutput { + boundingRegions?: Array; + caption?: DocumentCaptionOutput; + cells: Array; + columnCount: number; + footnotes?: Array; + rowCount: number; + spans: Array; +} + +// @public +export interface DocumentTypeDetailsOutput { + buildMode?: string; + description?: string; + fieldConfidence?: Record; + fieldSchema: Record; +} + +// @public +export interface DocumentWordOutput { + confidence: number; + content: string; + polygon?: number[]; + span: DocumentSpanOutput; +} + +// @public +export interface ErrorModelOutput { + code: string; + details?: Array; + innererror?: InnerErrorOutput; + message: string; + target?: string; +} + +// @public +export interface ErrorResponseOutput { + error: ErrorModelOutput; +} + +// @public (undocumented) +export interface GetAnalyzeResult { + get(options?: GetAnalyzeResultParameters): StreamableMethod; +} + +// @public +export interface GetAnalyzeResult200Response extends HttpResponse { + // (undocumented) + body: AnalyzeResultOperationOutput; + // (undocumented) + status: "200"; +} + +// @public (undocumented) +export interface GetAnalyzeResultDefaultResponse extends HttpResponse { + // (undocumented) + body: ErrorResponseOutput; + // (undocumented) + status: string; +} + +// @public (undocumented) +export type GetAnalyzeResultParameters = RequestParameters; + +// @public +export type GetArrayType = T extends Array ? TData : never; + +// @public (undocumented) +export interface GetClassifier { + delete(options?: DeleteClassifierParameters): StreamableMethod; + get(options?: GetClassifierParameters): StreamableMethod; +} + +// @public +export interface GetClassifier200Response extends HttpResponse { + // (undocumented) + body: DocumentClassifierDetailsOutput; + // (undocumented) + status: "200"; +} + +// @public (undocumented) +export interface GetClassifierDefaultResponse extends HttpResponse { + // (undocumented) + body: ErrorResponseOutput; + // (undocumented) + status: string; +} + +// @public (undocumented) +export type GetClassifierParameters = RequestParameters; + +// @public (undocumented) +export interface GetClassifyResult { + get(options?: GetClassifyResultParameters): StreamableMethod; +} + +// @public +export interface GetClassifyResult200Response extends HttpResponse { + // (undocumented) + body: AnalyzeResultOperationOutput; + // (undocumented) + status: "200"; +} + +// @public (undocumented) +export interface GetClassifyResultDefaultResponse extends HttpResponse { + // (undocumented) + body: ErrorResponseOutput; + // (undocumented) + status: string; +} + +// @public (undocumented) +export type GetClassifyResultParameters = RequestParameters; + +// @public +export interface GetDocumentClassifierBuildOperation200Response extends HttpResponse { + // (undocumented) + body: DocumentClassifierBuildOperationDetailsOutput; + // (undocumented) + status: "200"; +} + +// @public (undocumented) +export interface GetDocumentClassifierBuildOperationDefaultResponse extends HttpResponse { + // (undocumented) + body: ErrorResponseOutput; + // (undocumented) + status: string; +} + +// @public (undocumented) +export type GetDocumentClassifierBuildOperationParameters = RequestParameters; + +// @public (undocumented) +export interface GetDocumentModelBuildOperation { + get(options?: GetDocumentModelBuildOperationParameters): StreamableMethod; + get(options?: GetDocumentModelComposeOperationParameters): StreamableMethod; + get(options?: GetDocumentModelCopyToOperationParameters): StreamableMethod; + get(options?: GetDocumentClassifierBuildOperationParameters): StreamableMethod; + get(options?: GetOperationParameters): StreamableMethod; +} + +// @public +export interface GetDocumentModelBuildOperation200Response extends HttpResponse { + // (undocumented) + body: DocumentModelBuildOperationDetailsOutput; + // (undocumented) + status: "200"; +} + +// @public (undocumented) +export interface GetDocumentModelBuildOperationDefaultResponse extends HttpResponse { + // (undocumented) + body: ErrorResponseOutput; + // (undocumented) + status: string; +} + +// @public (undocumented) +export type GetDocumentModelBuildOperationParameters = RequestParameters; + +// @public +export interface GetDocumentModelComposeOperation200Response extends HttpResponse { + // (undocumented) + body: DocumentModelComposeOperationDetailsOutput; + // (undocumented) + status: "200"; +} + +// @public (undocumented) +export interface GetDocumentModelComposeOperationDefaultResponse extends HttpResponse { + // (undocumented) + body: ErrorResponseOutput; + // (undocumented) + status: string; +} + +// @public (undocumented) +export type GetDocumentModelComposeOperationParameters = RequestParameters; + +// @public +export interface GetDocumentModelCopyToOperation200Response extends HttpResponse { + // (undocumented) + body: DocumentModelCopyToOperationDetailsOutput; + // (undocumented) + status: "200"; +} + +// @public (undocumented) +export interface GetDocumentModelCopyToOperationDefaultResponse extends HttpResponse { + // (undocumented) + body: ErrorResponseOutput; + // (undocumented) + status: string; +} + +// @public (undocumented) +export type GetDocumentModelCopyToOperationParameters = RequestParameters; + +// @public +export function getLongRunningPoller(client: Client, initialResponse: BuildModel202Response | BuildModelDefaultResponse, options?: CreateHttpPollerOptions>): Promise, TResult>>; + +// @public (undocumented) +export function getLongRunningPoller(client: Client, initialResponse: ComposeModel202Response | ComposeModelDefaultResponse, options?: CreateHttpPollerOptions>): Promise, TResult>>; + +// @public (undocumented) +export function getLongRunningPoller(client: Client, initialResponse: CopyModelTo202Response | CopyModelToDefaultResponse, options?: CreateHttpPollerOptions>): Promise, TResult>>; + +// @public (undocumented) +export function getLongRunningPoller(client: Client, initialResponse: BuildClassifier202Response | BuildClassifierDefaultResponse, options?: CreateHttpPollerOptions>): Promise, TResult>>; + +// @public (undocumented) +export function getLongRunningPoller(client: Client, initialResponse: AnalyzeDocumentFromStream202Response | AnalyzeDocumentFromStreamDefaultResponse, options?: CreateHttpPollerOptions>): Promise, TResult>>; + +// @public (undocumented) +export function getLongRunningPoller(client: Client, initialResponse: ClassifyDocumentFromStream202Response | ClassifyDocumentFromStreamDefaultResponse, options?: CreateHttpPollerOptions>): Promise, TResult>>; + +// @public (undocumented) +export interface GetModel { + delete(options?: DeleteModelParameters): StreamableMethod; + get(options?: GetModelParameters): StreamableMethod; +} + +// @public +export interface GetModel200Response extends HttpResponse { + // (undocumented) + body: DocumentModelDetailsOutput; + // (undocumented) + status: "200"; +} + +// @public (undocumented) +export interface GetModelDefaultResponse extends HttpResponse { + // (undocumented) + body: ErrorResponseOutput; + // (undocumented) + status: string; +} + +// @public (undocumented) +export type GetModelParameters = RequestParameters; + +// @public +export interface GetOperation200Response extends HttpResponse { + // (undocumented) + body: OperationDetailsOutput; + // (undocumented) + status: "200"; +} + +// @public (undocumented) +export interface GetOperationDefaultResponse extends HttpResponse { + // (undocumented) + body: ErrorResponseOutput; + // (undocumented) + status: string; +} + +// @public (undocumented) +export type GetOperationParameters = RequestParameters; + +// @public +export type GetPage = (pageLink: string, maxPageSize?: number) => Promise<{ + page: TPage; + nextPageLink?: string; +}>; + +// @public (undocumented) +export interface GetResourceInfo { + get(options?: GetResourceInfoParameters): StreamableMethod; +} + +// @public +export interface GetResourceInfo200Response extends HttpResponse { + // (undocumented) + body: ResourceDetailsOutput; + // (undocumented) + status: "200"; +} + +// @public (undocumented) +export interface GetResourceInfoDefaultResponse extends HttpResponse { + // (undocumented) + body: ErrorResponseOutput; + // (undocumented) + status: string; +} + +// @public (undocumented) +export type GetResourceInfoParameters = RequestParameters; + +// @public +export interface InnerErrorOutput { + code?: string; + innererror?: InnerErrorOutput; + message?: string; +} + +// @public (undocumented) +export function isUnexpected(response: ListOperations200Response | ListOperationsDefaultResponse): response is ListOperationsDefaultResponse; + +// @public (undocumented) +export function isUnexpected(response: GetDocumentModelBuildOperation200Response | GetDocumentModelBuildOperationDefaultResponse): response is GetDocumentModelBuildOperationDefaultResponse; + +// @public (undocumented) +export function isUnexpected(response: GetResourceInfo200Response | GetResourceInfoDefaultResponse): response is GetResourceInfoDefaultResponse; + +// @public (undocumented) +export function isUnexpected(response: GetAnalyzeResult200Response | GetAnalyzeResultDefaultResponse): response is GetAnalyzeResultDefaultResponse; + +// @public (undocumented) +export function isUnexpected(response: AnalyzeDocumentFromStream202Response | AnalyzeDocumentFromStreamLogicalResponse | AnalyzeDocumentFromStreamDefaultResponse): response is AnalyzeDocumentFromStreamDefaultResponse; + +// @public (undocumented) +export function isUnexpected(response: GetModel200Response | GetModelDefaultResponse): response is GetModelDefaultResponse; + +// @public (undocumented) +export function isUnexpected(response: DeleteModel204Response | DeleteModelDefaultResponse): response is DeleteModelDefaultResponse; + +// @public (undocumented) +export function isUnexpected(response: BuildModel202Response | BuildModelLogicalResponse | BuildModelDefaultResponse): response is BuildModelDefaultResponse; + +// @public (undocumented) +export function isUnexpected(response: ComposeModel202Response | ComposeModelLogicalResponse | ComposeModelDefaultResponse): response is ComposeModelDefaultResponse; + +// @public (undocumented) +export function isUnexpected(response: AuthorizeModelCopy200Response | AuthorizeModelCopyDefaultResponse): response is AuthorizeModelCopyDefaultResponse; + +// @public (undocumented) +export function isUnexpected(response: CopyModelTo202Response | CopyModelToLogicalResponse | CopyModelToDefaultResponse): response is CopyModelToDefaultResponse; + +// @public (undocumented) +export function isUnexpected(response: ListModels200Response | ListModelsDefaultResponse): response is ListModelsDefaultResponse; + +// @public (undocumented) +export function isUnexpected(response: BuildClassifier202Response | BuildClassifierLogicalResponse | BuildClassifierDefaultResponse): response is BuildClassifierDefaultResponse; + +// @public (undocumented) +export function isUnexpected(response: ListClassifiers200Response | ListClassifiersDefaultResponse): response is ListClassifiersDefaultResponse; + +// @public (undocumented) +export function isUnexpected(response: GetClassifier200Response | GetClassifierDefaultResponse): response is GetClassifierDefaultResponse; + +// @public (undocumented) +export function isUnexpected(response: DeleteClassifier204Response | DeleteClassifierDefaultResponse): response is DeleteClassifierDefaultResponse; + +// @public (undocumented) +export function isUnexpected(response: ClassifyDocumentFromStream202Response | ClassifyDocumentFromStreamLogicalResponse | ClassifyDocumentFromStreamDefaultResponse): response is ClassifyDocumentFromStreamDefaultResponse; + +// @public (undocumented) +export function isUnexpected(response: GetClassifyResult200Response | GetClassifyResultDefaultResponse): response is GetClassifyResultDefaultResponse; + +// @public (undocumented) +export interface ListClassifiers { + get(options?: ListClassifiersParameters): StreamableMethod; +} + +// @public +export interface ListClassifiers200Response extends HttpResponse { + // (undocumented) + body: PagedDocumentClassifierDetailsOutput; + // (undocumented) + status: "200"; +} + +// @public (undocumented) +export interface ListClassifiersDefaultResponse extends HttpResponse { + // (undocumented) + body: ErrorResponseOutput; + // (undocumented) + status: string; +} + +// @public (undocumented) +export type ListClassifiersParameters = RequestParameters; + +// @public (undocumented) +export interface ListModels { + get(options?: ListModelsParameters): StreamableMethod; +} + +// @public +export interface ListModels200Response extends HttpResponse { + // (undocumented) + body: PagedDocumentModelDetailsOutput; + // (undocumented) + status: "200"; +} + +// @public (undocumented) +export interface ListModelsDefaultResponse extends HttpResponse { + // (undocumented) + body: ErrorResponseOutput; + // (undocumented) + status: string; +} + +// @public (undocumented) +export type ListModelsParameters = RequestParameters; + +// @public (undocumented) +export interface ListOperations { + get(options?: ListOperationsParameters): StreamableMethod; +} + +// @public +export interface ListOperations200Response extends HttpResponse { + // (undocumented) + body: PagedOperationDetailsOutput; + // (undocumented) + status: "200"; +} + +// @public (undocumented) +export interface ListOperationsDefaultResponse extends HttpResponse { + // (undocumented) + body: ErrorResponseOutput; + // (undocumented) + status: string; +} + +// @public (undocumented) +export type ListOperationsParameters = RequestParameters; + +// @public +export type OperationDetailsOutput = DocumentModelBuildOperationDetailsOutput | DocumentModelComposeOperationDetailsOutput | DocumentModelCopyToOperationDetailsOutput | DocumentClassifierBuildOperationDetailsOutput; + +// @public +export interface OperationDetailsOutputParent { + apiVersion?: string; + createdDateTime: string; + error?: ErrorModelOutput; + // (undocumented) + kind: string; + lastUpdatedDateTime: string; + operationId: string; + percentCompleted?: number; + resourceLocation: string; + status: string; + tags?: Record; +} + +// @public +export type PagedDocumentClassifierDetailsOutput = Paged; + +// @public +export type PagedDocumentModelDetailsOutput = Paged; + +// @public +export type PagedOperationDetailsOutput = Paged; + +// @public +export function paginate(client: Client, initialResponse: TResponse, options?: PagingOptions): PagedAsyncIterableIterator>; + +// @public +export type PaginateReturn = TResult extends { + body: { + value?: infer TPage; + }; +} ? GetArrayType : Array; + +// @public +export interface PagingOptions { + customGetPage?: GetPage[]>; +} + +// @public +export interface QuotaDetailsOutput { + quota: number; + quotaResetDateTime: string; + used: number; +} + +// @public +export interface ResourceDetailsOutput { + customDocumentModels: CustomDocumentModelsDetailsOutput; + customNeuralDocumentModelBuilds: QuotaDetailsOutput; +} + +// @public (undocumented) +export interface Routes { + (path: "/operations"): ListOperations; + (path: "/operations/{operationId}", operationId: string): GetDocumentModelBuildOperation; + (path: "/info"): GetResourceInfo; + (path: "/documentModels/{modelId}/analyzeResults/{resultId}", modelId: string, resultId: string): GetAnalyzeResult; + (path: "/documentModels/{modelId}:analyze", modelId: string): AnalyzeDocumentFromStream; + (path: "/documentModels/{modelId}", modelId: string): GetModel; + (path: "/documentModels:build"): BuildModel; + (path: "/documentModels:compose"): ComposeModel; + (path: "/documentModels:authorizeCopy"): AuthorizeModelCopy; + (path: "/documentModels/{modelId}:copyTo", modelId: string): CopyModelTo; + (path: "/documentModels"): ListModels; + (path: "/documentClassifiers:build"): BuildClassifier; + (path: "/documentClassifiers"): ListClassifiers; + (path: "/documentClassifiers/{classifierId}", classifierId: string): GetClassifier; + (path: "/documentClassifiers/{classifierId}:analyze", classifierId: string): ClassifyDocumentFromStream; + (path: "/documentClassifiers/{classifierId}/analyzeResults/{resultId}", classifierId: string, resultId: string): GetClassifyResult; +} + +// (No @packageDocumentation comment for this package) + +``` diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/sample.env b/sdk/documentintelligence/ai-document-intelligence-rest/sample.env new file mode 100644 index 000000000000..4edbb15d213c --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/sample.env @@ -0,0 +1,8 @@ +# Used in most samples. Retrieve these values from a Cognitive Services instance +# in the Azure Portal. +DOCUMENT_INTELLIGENCE_ENDPOINT="https://.cognitiveservies.azure.com/" +DOCUMENT_INTELLIGENCE_API_KEY="
" + +# Our tests assume that TEST_MODE is "playback" by default. You can +# change it to "record" to generate new recordings, or "live" to bypass the recorder entirely. +# TEST_MODE=playback diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/analyzeDocumentByModelId.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/analyzeDocumentByModelId.ts new file mode 100644 index 000000000000..898e02629d2c --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/analyzeDocumentByModelId.ts @@ -0,0 +1,57 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to analyze a document using a model with a given ID. The model ID may refer to any model, + * whether custom, prebuilt, composed, etc. + * + * @summary analyze a document using a model by ID + */ + +import DocumentIntelligence, { AnalyzeResultOperationOutput, getLongRunningPoller, isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + const modelId = process.env.DOCUMENT_INTELLIGENCE_CUSTOM_MODEL_ID || "";// "prebuilt-layout"; + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", modelId) + .post({ + contentType: "application/json", + body: { + urlSource: "https://raw.githubusercontent.com/Azure/azure-sdk-for-js/main/sdk/formrecognizer/ai-form-recognizer/assets/receipt/contoso-receipt.png", + }, + queryParameters: { locale: "en-IN" }, + }); + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = await getLongRunningPoller(client, initialResponse); + const analyzeResult = ( + (await (poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const documents = analyzeResult?.documents; + + const document = documents && documents[0]; + if (!document) { + throw new Error("Expected at least one document in the result."); + } + + console.log( + "Extracted document:", + document.docType, + `(confidence: ${document.confidence || ""})` + ); + console.log("Fields:", document.fields); +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/analyzeIdentityDocument.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/analyzeIdentityDocument.ts new file mode 100644 index 000000000000..eed5ed114241 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/analyzeIdentityDocument.ts @@ -0,0 +1,74 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to extract elements of an identity document (such as a driver license or passport) from a URL + * to a file using the prebuilt identity document model. + * + * The prebuilt identity document model can return several fields. For a detailed list of the fields supported by the + * identity document model, see the `IdentityDocument` type in the documentation, or refer to the following link: + * + * https://aka.ms/azsdk/documentitelligence/iddocumentfieldschema + * + * @summary extract data from an identity document + * @azsdk-skip-javascript + */ + +import DocumentIntelligence, { AnalyzeResultOperationOutput, getLongRunningPoller, isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-idDocument") + .post({ + contentType: "application/json", + body: { + // The Document Intelligence service will access the following URL to a driver license image and extract data from it + urlSource: + "https://raw.githubusercontent.com/Azure/azure-sdk-for-js/main/sdk/formrecognizer/ai-form-recognizer/assets/identityDocument/license.png", + }, + queryParameters: { locale: "en-IN" }, + }); + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = await getLongRunningPoller(client, initialResponse); + const analyzeResult = ( + (await (poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const documents = analyzeResult?.documents; + + const document = documents && documents[0]; + + // Use of PrebuiltModels.Receipt above (rather than the raw model ID), adds strong typing of the model's output + if (document) { + // The identity document model has multiple document types, so we need to know which document type was actually + // extracted. + if (document.docType === "idDocument.driverLicense") { + // For the sake of the example, we'll only show a few of the fields that are produced. + console.log("Extracted a Driver License:"); + console.log(document.fields) + } else if (document.docType === "idDocument.passport") { + console.log("Extracted a Passport:"); + console.log(document.fields) + } else { + // The only reason this would happen is if the client library's schema for the prebuilt identity document model is + // out of date, and a new document type has been introduced. + console.error("Unknown document type in result:", document); + } + } else { + throw new Error("Expected at least one receipt in the result."); + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/analyzeInvoice.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/analyzeInvoice.ts new file mode 100644 index 000000000000..566396705072 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/analyzeInvoice.ts @@ -0,0 +1,63 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to extract elements of an invoice from a URL to a file using the prebuilt invoice model. + * + * The prebuilt invoice model can return several fields. For a detailed list of the fields supported by the invoice + * model, see the `Invoice` type in the documentation, or refer to the following link: + * + * https://aka.ms/azsdk/documentitelligence/invoicefieldschema + * + * @summary extract data from an invoice document + * @azsdk-skip-javascript + */ + +import DocumentIntelligence, { AnalyzeResultOperationOutput, getLongRunningPoller, isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-invoice") + .post({ + contentType: "application/json", + body: { + // The Document Intelligence service will access the following URL to an invoice image and extract data from it + urlSource: "https://raw.githubusercontent.com/Azure/azure-sdk-for-js/main/sdk/formrecognizer/ai-form-recognizer/assets/invoice/sample_invoice.jpg", + } + }); + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = await getLongRunningPoller(client, initialResponse); + const analyzeResult = ( + (await (poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const documents = analyzeResult?.documents; + + const document = documents && documents[0]; + if (!document) { + throw new Error("Expected at least one document in the result."); + } + + + // Use of PrebuiltModels.Receipt above (rather than the raw model ID), adds strong typing of the model's output + if (document) { + console.log(document.fields); + } else { + throw new Error("Expected at least one receipt in the result."); + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/analyzeReceipt.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/analyzeReceipt.ts new file mode 100644 index 000000000000..2f62d481193e --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/analyzeReceipt.ts @@ -0,0 +1,60 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to extract elements of a receipt from a URL to a file using the prebuilt receipt model. + * + * The prebuilt receipt model can return several fields. For a detailed list of the fields supported by the receipt + * model, see the `Receipt` type in the documentation, or refer to the following link: + * + * https://aka.ms/azsdk/documentitelligence/receiptfieldschema + * + * @summary extract data from a receipt document + * @azsdk-skip-javascript + */ + +import DocumentIntelligence, { AnalyzeResultOperationOutput, getLongRunningPoller, isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-receipt") + .post({ + contentType: "application/json", + body: { + // The Document Intelligence service will access the following URL to a receipt image and extract data from it + urlSource: "https://raw.githubusercontent.com/Azure/azure-sdk-for-js/main/sdk/formrecognizer/ai-form-recognizer/assets/receipt/contoso-receipt.png", + } + }); + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = await getLongRunningPoller(client, initialResponse); + const analyzeResult = ( + (await (poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const documents = analyzeResult?.documents; + + const document = documents && documents[0]; + + + + // Use of PrebuiltModels.Receipt above (rather than the raw model ID), as it adds strong typing of the model's output + if (document) { + console.log(document.fields); + } else { + throw new Error("Expected at least one receipt in the result."); + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/analyzeReceiptByModelId.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/analyzeReceiptByModelId.ts new file mode 100644 index 000000000000..462f21bb5568 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/analyzeReceiptByModelId.ts @@ -0,0 +1,59 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to extract elements of a receipt from a URL to a file using the prebuilt receipt model. Rather + * than using the `PrebuiltModels.Receipt` document model, this sample shows the use of the prebuilt model by ID, + * resulting in a weaker type that exactly mirrors the model's field schema at runtime. + * + * The prebuilt receipt model can return several fields. For a detailed list of the fields supported by the + * receipt model, see the `Receipt` type in the documentation, or refer to the following link: + * + * https://aka.ms/azsdk/documentitelligence/receiptfieldschema + * + * @summary use the "prebuilt-receipt" model ID to extract data from a receipt document (weakly-typed) + */ + +import DocumentIntelligence, { AnalyzeResultOperationOutput, getLongRunningPoller, isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-receipt") + .post({ + contentType: "application/json", + body: { + // The Document Intelligence service will access the following URL to a receipt image and extract data from it + urlSource: "https://raw.githubusercontent.com/Azure/azure-sdk-for-js/main/sdk/formrecognizer/ai-form-recognizer/assets/receipt/contoso-receipt.png", + } + }); + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = await getLongRunningPoller(client, initialResponse); + + poller.onProgress((state) => console.log("Operation:", state.result, state.status)); + const analyzeResult = ( + (await (poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const documents = analyzeResult?.documents; + + const result = documents && documents[0]; + if (result) { + console.log(result.fields); + } else { + throw new Error("Expected at least one receipt in the result."); + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/analyzeW2TaxForm.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/analyzeW2TaxForm.ts new file mode 100644 index 000000000000..fcc231737ede --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/analyzeW2TaxForm.ts @@ -0,0 +1,64 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to extract elements of a United States W2 tax form from a file using the prebuilt US W2 model. + * + * The prebuilt W2 model can return several fields. For a detailed list of the fields supported by the model, see the + * `TaxUsW2` type in the documentation, or refer to the following link: + * + * https://aka.ms/azsdk/documentitelligence/taxusw2fieldschema + * + * @summary extract data from a United States W2 tax document + * @azsdk-skip-javascript + */ + +import DocumentIntelligence, { AnalyzeResultOperationOutput, getLongRunningPoller, isUnexpected } from "@azure-rest/ai-document-intelligence"; +import fs from "fs"; +import path from "path"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + + const filePath = fs.readFileSync(path.join(".", "assets", "w2", "w2-single.png")); + + const base64Source = fs.readFileSync(filePath, { encoding: "base64" }); + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-tax.us.w2") + .post({ + contentType: "application/json", + body: { + base64Source, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = await getLongRunningPoller(client, initialResponse); + const analyzeResult = ( + (await (poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const documents = analyzeResult?.documents; + const document = documents?.[0]; + + if (document) { + console.log("Extracted W2 tax form:"); + console.log(document.fields); + } else { + throw new Error("Expected at least one document in the result."); + } +} + +main().catch((error) => { + console.error(error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/buildClassifier.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/buildClassifier.ts new file mode 100644 index 000000000000..d7656ce1bde4 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/buildClassifier.ts @@ -0,0 +1,80 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to programmatically build a custom classifier. + * + * The Document Intelligence service expects the training data to be organized and labeled according to a particular + * convention and stored in an Azure Storage container. For more information about creating a training data set, please + * see the information at the following link to the service's documentation: + * + * https://aka.ms/azsdk/documentitelligence/buildclassifiermodel + * + * @summary build a classifier from a training data set + */ + +import DocumentIntelligence, { DocumentClassifierBuildOperationDetailsOutput, getLongRunningPoller, isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + + const random = Date.now().toString(); + const modelId = + (process.env.CUSTOM_CLASSIFIER_ID || "") + random.substring(random.length - 6); + + const trainingDataSasUrl1 = + process.env.CUSTOM_CLASSIFIER_TRAINING_DATA_SAS_URL_1 || ""; + + const trainingDataSasUrl2 = + process.env.CUSTOM_CLASSIFIER_TRAINING_DATA_SAS_URL_2 || ""; + + const initialResponse = await client.path("/documentClassifiers:build").post({ + body: { + classifierId: modelId, + description: "Custom classifier description", + docTypes: { + foo: { + azureBlobSource: { + containerUrl: trainingDataSasUrl1, + }, + }, + bar: { + azureBlobSource: { + containerUrl: trainingDataSasUrl2, + }, + }, + }, + } + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = await getLongRunningPoller(client, initialResponse); + const classifier = ( + (await (poller).pollUntilDone()).body as DocumentClassifierBuildOperationDetailsOutput + ).result; + if (!classifier) { + throw new Error("Expected a DocumentClassifierDetailsOutput response."); + } + + + console.log("Classifier ID:", classifier.classifierId); + console.log("Description:", classifier.description); + console.log("Created:", classifier.createdDateTime); + + console.log("Document Types:"); + for (const [docType, details] of Object.entries(classifier.docTypes)) { + console.log(`- Name: "${docType}", source: ${JSON.stringify(details, null, 2)}`); + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/buildModel.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/buildModel.ts new file mode 100644 index 000000000000..d26a78e66a48 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/buildModel.ts @@ -0,0 +1,79 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to programmatically build a model with a single document type using a training data set. + * + * The Document Intelligence service expects the training data to be organized and labeled according to a particular + * convention and stored in an Azure Storage container. For more information about creating a training data set, please + * see the information at the following link to the service's documentation: + * + * https://aka.ms/azsdk/documentitelligence/buildtrainingset + * + * @summary build a model with a single document type from a training data set + */ + +import DocumentIntelligence, { DocumentModelBuildOperationDetailsOutput, getLongRunningPoller, isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + const random = Date.now().toString(); + const modelId = + (process.env.CUSTOM_MODEL_ID || "") + random.substring(random.length - 6); + const trainingDataSasUrl = + process.env.CUSTOM_MODEL_TRAINING_DATA_SAS_URL || ""; + + const initialResponse = await client.path("/documentModels:build").post({ + body: { + buildMode: "template", + modelId, + azureBlobSource: { + containerUrl: trainingDataSasUrl + }, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = getLongRunningPoller(client, initialResponse); + const model = ( + (await (await poller).pollUntilDone()).body as DocumentModelBuildOperationDetailsOutput + ).result; + if (!model) { + throw new Error("Expected a DocumentModelDetailsOutput response."); + } + + console.log("Model ID:", model.modelId); + console.log("Description:", model.description); + console.log("Created:", model.createdDateTime); + + // A model may contain several document types, which describe the possible object structures of fields extracted using + // this model + + console.log("Document Types:"); + for (const [docType, { description, fieldSchema: schema }] of Object.entries( + model.docTypes || {} + )) { + console.log(`- Name: "${docType}"`); + console.log(` Description: "${description}"`); + + // For simplicity, this example will only show top-level field names + console.log(" Fields:"); + + for (const [fieldName, fieldSchema] of Object.entries(schema)) { + console.log(` - "${fieldName}" (${fieldSchema.type})`); + console.log(` ${fieldSchema.description || ""}`); + } + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/classifyDocument.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/classifyDocument.ts new file mode 100644 index 000000000000..5fb8537c8f5b --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/classifyDocument.ts @@ -0,0 +1,55 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to use a custom classifier to get the document type (class) of a document. + * + * @summary use a custom classifier to classify a document + */ + +import DocumentIntelligence, { AnalyzeResultOperationOutput, getLongRunningPoller, isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + const documentUrl = + "https://raw.githubusercontent.com/Azure/azure-sdk-for-js/main/sdk/formrecognizer/ai-form-recognizer/assets/invoice/Invoice_1.pdf"; + + const classifierId = process.env.CUSTOM_CLASSIFIER_ID ?? ""; + const initialResponse = await client + .path("/documentClassifiers/{classifierId}:analyze", classifierId) + .post({ + contentType: "application/json", + body: { + urlSource: documentUrl, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = await getLongRunningPoller(client, initialResponse); + const analyzeResult = ( + (await (poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + if (analyzeResult?.documents === undefined || analyzeResult.documents.length === 0) { + throw new Error("Failed to extract any documents."); + } + + for (const document of analyzeResult.documents) { + console.log( + `Extracted a document with type '${document.docType}' on page ${document.boundingRegions?.[0].pageNumber} (confidence: ${document.confidence})` + ); + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/composeModel.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/composeModel.ts new file mode 100644 index 000000000000..d16fca04a898 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/composeModel.ts @@ -0,0 +1,120 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +/** + * This sample demonstrates how to create a composed model from several individual labeled models. + * + * We build all of the component models used in the composition operation and then finally create the composed model. + * The resulting composed model will have all of the document types of its component submodels. When used for analysis, + * it will first classify the input as belonging to one of the document types.zzs + * + * @summary create a composed model from several individual labeled models + * @azsdk-weight 60 + */ + +import DocumentIntelligence, { DocumentModelBuildOperationDetailsOutput, DocumentModelComposeOperationDetailsOutput, getLongRunningPoller, isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + + // This object will hold the SAS-encoded URLs to containers that hold + // different types of purchase order documents and their labels. + const purchaseOrderSasUrls = { + supplies: + process.env["PURCHASE_ORDER_SUPPLIES_SAS_URL"] || + "", + equipment: + process.env["PURCHASE_ORDER_EQUIPMENT_SAS_URL"] || + "", + furniture: + process.env["PURCHASE_ORDER_FURNITURE_SAS_URL"] || + "", + cleaningSupplies: + process.env["PURCHASE_ORDER_CLEANING_SUPPLIES_SAS_URL"] || + "", + }; + + // We'll put the last few digits of the current timestamp into the model IDs, just to make sure they're unique. + const random = Date.now().toString(); + + const modelIds = await Promise.all( + Object.entries(purchaseOrderSasUrls) + .map(async ([kind, sasUrl]) => { + const modelId = kind + "ComponentModel" + random.substring(random.length - 6); + const initialResponse = await client.path("/documentModels:build").post({ + body: { + buildMode: "template", + modelId: modelId, + azureBlobSource: { + containerUrl: sasUrl, + }, + }, + }); + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = await getLongRunningPoller(client, initialResponse); + const model = ( + (await (poller).pollUntilDone()).body as DocumentModelBuildOperationDetailsOutput + ).result!; + + return model; + }) + .map(async (model) => { return { modelId: (await model).modelId } }) + ); + + // Finally, create the composed model. + + const composedModelId = "purchaseOrders" + random.substring(random.length - 6); + + const initialResponse = await client.path("/documentModels:compose").post({ + body: { + description: "A composed model that classifies purchase order documents and extracts data from them.", + componentModels: modelIds, + modelId: composedModelId, + + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = await getLongRunningPoller(client, initialResponse); + + const composedModel = ( + (await (poller).pollUntilDone()).body as DocumentModelComposeOperationDetailsOutput + ).result!; + + + console.log("Model ID:", composedModel.modelId); + console.log("Description:", composedModel.description); + console.log("Created:", composedModel.createdDateTime); + + // The composed model should have a document type for each one of the individually built models that are composed into + // this larger model. + + console.log("Document Types:"); + for (const [docType, { description, fieldSchema: schema }] of Object.entries( + composedModel.docTypes || {} + )) { + console.log(`- Name: "${docType}"`); + console.log(` Description: "${description}"`); + + // For simplicity, this example will only show top-level field names + console.log(" Fields:"); + + for (const [fieldName, fieldSchema] of Object.entries(schema)) { + console.log(` - "${fieldName}" (${fieldSchema.type})`); + console.log(` ${fieldSchema.description || ""}`); + } + } +} + +main().catch((err) => { + console.error("The sample encountered an error:", err); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/copyModel.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/copyModel.ts new file mode 100644 index 000000000000..c221333d6a8c --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/copyModel.ts @@ -0,0 +1,66 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to copy a model from one resource to another. The model is created with a new model ID (and + * optionally a new description) in the destination resource, but will have the same document types as the source model. + * + * @summary copy a model from one resource to another + */ + +import DocumentIntelligence, { DocumentModelCopyToOperationDetailsOutput, getLongRunningPoller, isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + + const random = Date.now().toString(); + const destinationModelId = + (process.env.CUSTOM_MODEL_ID || "") + random.substring(random.length - 6); + + // The authorization must be created by the destination resource. + // const destinationClient = new DocumentModelAdministrationClient(endpoint, credential); + const destinationClient = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + // const authorization = await destinationClient.getCopyAuthorization(destinationModelId); + const targetAuth = await destinationClient.path("/documentModels:authorizeCopy").post({ + body: { + modelId: destinationModelId, + }, + }); + if (isUnexpected(targetAuth)) { + throw targetAuth.body.error; + } + const sourceEndpoint = process.env.DOCUMENT_INTELLIGENCE_SOURCE_ENDPOINT || ""; + const sourceModelId = process.env.COPY_SOURCE_MODEL_ID || ""; + + // Then, the source resource can initiate the copy operation. + const sourceClient = DocumentIntelligence( + sourceEndpoint, + { key: process.env.DOCUMENT_INTELLIGENCE_SOURCE_API_KEY || "" }) + + const copyInitResponse = await sourceClient + .path("/documentModels/{modelId}:copyTo", sourceModelId) + .post({ + body: targetAuth.body, + }); + + if (isUnexpected(copyInitResponse)) { + throw copyInitResponse.body.error; + } + const copyPoller = getLongRunningPoller(sourceClient, copyInitResponse); + const model = ( + (await (await copyPoller).pollUntilDone()).body as DocumentModelCopyToOperationDetailsOutput + ).result!; + + console.log("Model ID:", model.modelId); + console.log("Description:", model.description); + console.log("Created:", model.createdDateTime); +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/extractLayout.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/extractLayout.ts new file mode 100644 index 000000000000..b9d5118f2310 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/extractLayout.ts @@ -0,0 +1,81 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to extract only the basic layout information from a document using the `beginExtractLayout` + * method. Layout information consists of the arrangement of basic OCR elements, such as pages (including their contents + * such as lines, words, and selection marks), tables, and text font styles. + * + * @summary use the prebuilt layout model to extract basic document elements only + * @azsdk-skip-javascript + */ + +import DocumentIntelligence, { AnalyzeResultOperationOutput, getLongRunningPoller, isUnexpected } from "@azure-rest/ai-document-intelligence"; + + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-layout") + .post({ + contentType: "application/json", + body: { + urlSource: "https://raw.githubusercontent.com/Azure/azure-sdk-for-js/main/sdk/formrecognizer/ai-form-recognizer/assets/forms/Invoice_1.pdf", + } + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = getLongRunningPoller(client, initialResponse); + const analyzeResult = ( + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const pages = analyzeResult?.pages; + const tables = analyzeResult?.tables; + + if (!pages || pages.length <= 0) { + console.log("No pages were extracted from the document."); + } else { + console.log("Pages:"); + for (const page of pages) { + console.log("- Page", page.pageNumber, `(unit: ${page.unit})`); + console.log(` ${page.width}x${page.height}, angle: ${page.angle}`); + console.log( + ` ${page.lines && page.lines.length} lines, ${page.words && page.words.length} words` + ); + + if (page.lines && page.lines.length > 0) { + console.log(" Lines:"); + + for (const line of page.lines) { + console.log(` - "${line.content}"`); + } + } + } + } + + if (!tables || tables.length <= 0) { + console.log("No tables were extracted from the document."); + } else { + console.log("Tables:"); + for (const table of tables) { + console.log( + `- Extracted table: ${table.columnCount} columns, ${table.rowCount} rows (${table.cells.length} cells)` + ); + } + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/getClassifier.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/getClassifier.ts new file mode 100644 index 000000000000..45fc7c3e1da9 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/getClassifier.ts @@ -0,0 +1,41 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to get the details of a custom classifier by its ID, including information about the document + * types that the classifier supports. + * + * @summary get information about a classifier by its ID + */ + +import DocumentIntelligence, { isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + + const classifierId = process.env.CUSTOM_CLASSIFIER_ID ?? ""; + const classifier = await client.path("/documentClassifiers/{classifierId}", classifierId).get(); + + if (isUnexpected(classifier)) { + throw classifier.body.error; + } + console.log("ID", classifier.body.classifierId); + console.log("Created:", classifier.body.createdDateTime); + console.log("Description: ", classifier.body.description || ""); + + console.log("Document Types:"); + for (const [docType, details] of Object.entries(classifier.body.docTypes || {})) { + // We can also programmatically access a schema of the fields. + console.log(`- Name "${docType}", source: ${JSON.stringify(details, null, 2)}`); + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/getInfo.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/getInfo.ts new file mode 100644 index 000000000000..414c3fe065ff --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/getInfo.ts @@ -0,0 +1,32 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to programmatically retrieve metadata about the number of custom models in the Form Recognizer + * resource and the limit of custom models that the resource will allow to be created. + * + * @summary get information about the count and limit of custom models in the resource + */ + +import DocumentIntelligence, { isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + const info = await client.path("/info").get(); + if (isUnexpected(info)) { + throw info.body.error; + } + console.log( + `Custom document models: ${info.body.customDocumentModels.count} of ${info.body.customDocumentModels.limit}` + ); +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/getModel.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/getModel.ts new file mode 100644 index 000000000000..a5b0d1a0903b --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/getModel.ts @@ -0,0 +1,45 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to get the full information about a custom model by its model ID, including information about + * the document types in the model and their field schemas. + * + * @summary get information about a model by its ID + */ + +import DocumentIntelligence, { isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + + // The model ID to query. This can be any model ID, not just a custom model, so for example + // the following sample uses `"prebuilt-idDocument"`, but you can change it to any model ID + // you'd like to inspect. + const modelId = "prebuilt-idDocument"; + const model = await client.path("/documentModels/{modelId}", modelId).get(); + + if (isUnexpected(model)) { + throw model.body.error; + } + + console.log("ID", model.body.modelId); + console.log("Created:", model.body.createdDateTime); + console.log("Description: ", model.body.description || ""); + + console.log("Document Types:"); + for (const [docType, { fieldSchema }] of Object.entries(model.body.docTypes || {})) { + // We can also programmatically access a schema of the fields. + console.log("-", docType, JSON.stringify(fieldSchema, undefined, 2)); + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/listModels.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/listModels.ts new file mode 100644 index 000000000000..54ed144b4548 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/listModels.ts @@ -0,0 +1,49 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to iterate over the models in a resource. This will include both custom and prebuilt models. + * + * @summary iterate over the models in a resource + */ + +import DocumentIntelligence, { isUnexpected, paginate } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + + + const response = await client.path("/documentModels").get(); + if (isUnexpected(response)) { + throw response.body.error; + } + + for await (const model of paginate(client, response)) { + console.log("- ID", model.modelId); + console.log(" Created:", model.createdDateTime); + console.log(" Description: ", model.description || ""); + + // The model summary does not include `docTypes`, so we must additionally call `getModel` to retrieve them + const detailedModel = (await client.path("/documentModels/{modelId}", model.modelId).get()); + + if (isUnexpected(detailedModel)) { + throw detailedModel.body.error; + } + const docTypes = detailedModel.body.docTypes; + + console.log(" Document Types:"); + for (const docType of Object.keys(docTypes || {})) { + console.log(" -", docType); + } + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/readDocument.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/readDocument.ts new file mode 100644 index 000000000000..a20e007c7940 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples-dev/readDocument.ts @@ -0,0 +1,93 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to extract the text content of a document using the "prebuilt-read" model. + * + * @summary use the prebuilt "read" model to extract information about the text content of a document + */ + +import DocumentIntelligence, { AnalyzeResultOperationOutput, getLongRunningPoller, isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-read") + .post({ + contentType: "application/json", + body: { + urlSource: "https://raw.githubusercontent.com/Azure/azure-sdk-for-js/main/sdk/formrecognizer/ai-form-recognizer/assets/forms/Invoice_1.pdf", + }, + queryParameters: { features: ["barcodes"] }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = await getLongRunningPoller(client, initialResponse); + const analyzeResult = ( + (await (poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + + // The "prebuilt-read" model (`beginReadDocument` method) only extracts information about the textual content of the + // document, such as page text elements and information about the language of the text. + const pages = analyzeResult?.pages; + const languages = analyzeResult?.languages; + const styles = analyzeResult?.styles; + + + if (!pages || pages.length <= 0) { + console.log("No pages were extracted from the document."); + } else { + console.log("Pages:"); + for (const page of pages) { + console.log("- Page", page.pageNumber, `(unit: ${page.unit})`); + console.log(` ${page.width}x${page.height}, angle: ${page.angle}`); + console.log( + ` ${page.lines && page.lines.length} lines, ${page.words && page.words.length} words` + ); + + if (page.lines && page.lines.length > 0) { + console.log(" Lines:"); + + for (const line of page.lines) { + console.log(` - "${line.content}"`); + } + } + } + } + + if (!languages || languages.length <= 0) { + console.log("No language spans were extracted from the document."); + } else { + console.log("Languages:"); + for (const languageEntry of languages) { + console.log( + `- Found language: ${languageEntry.locale} (confidence: ${languageEntry.confidence})` + ); + } + } + + if (!styles || styles.length <= 0) { + console.log("No text styles were extracted from the document."); + } else { + console.log("Styles:"); + for (const style of styles) { + console.log( + `- Handwritten: ${style.isHandwritten ? "yes" : "no"} (confidence=${style.confidence})` + ); + } + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/README.md b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/README.md new file mode 100644 index 000000000000..de0cc2245893 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/README.md @@ -0,0 +1,71 @@ +# Azure Document Intelligence Rest Client client library samples for JavaScript (Beta) + +These sample programs show how to use the JavaScript client libraries for Azure Document Intelligence Rest Client in some common scenarios. + +| **File Name** | **Description** | +| ------------------------------------------------------- | ------------------------------------------------------------------------------------------ | +| [composeModel.js][composemodel] | create a composed model from several individual labeled models | +| [analyzeDocumentByModelId.js][analyzedocumentbymodelid] | analyze a document using a model by ID | +| [analyzeReceiptByModelId.js][analyzereceiptbymodelid] | use the "prebuilt-receipt" model ID to extract data from a receipt document (weakly-typed) | +| [buildClassifier.js][buildclassifier] | build a classifier from a training data set | +| [buildModel.js][buildmodel] | build a model with a single document type from a training data set | +| [classifyDocument.js][classifydocument] | use a custom classifier to classify a document | +| [copyModel.js][copymodel] | copy a model from one resource to another | +| [getClassifier.js][getclassifier] | get information about a classifier by its ID | +| [getInfo.js][getinfo] | get information about the count and limit of custom models in the resource | +| [getModel.js][getmodel] | get information about a model by its ID | +| [listModels.js][listmodels] | iterate over the models in a resource | +| [readDocument.js][readdocument] | use the prebuilt "read" model to extract information about the text content of a document | + +## Prerequisites + +The sample programs are compatible with [LTS versions of Node.js](https://github.com/nodejs/release#release-schedule). + +You need [an Azure subscription][freesub] to run these sample programs. + +Samples retrieve credentials to access the service endpoint from environment variables. Alternatively, edit the source code to include the appropriate credentials. See each individual sample for details on which environment variables/credentials it requires to function. + +Adapting the samples to run in the browser may require some additional consideration. For details, please see the [package README][package]. + +## Setup + +To run the samples using the published version of the package: + +1. Install the dependencies using `npm`: + +```bash +npm install +``` + +2. Edit the file `sample.env`, adding the correct credentials to access the Azure service and run the samples. Then rename the file from `sample.env` to just `.env`. The sample programs will read this file automatically. + +3. Run whichever samples you like (note that some samples may require additional setup, see the table above): + +```bash +node composeModel.js +``` + +Alternatively, run a single sample with the correct environment variables set (setting up the `.env` file is not required if you do this), for example (cross-platform): + +```bash +npx cross-env DOCUMENT_INTELLIGENCE_ENDPOINT="" DOCUMENT_INTELLIGENCE_API_KEY="" PURCHASE_ORDER_SUPPLIES_SAS_URL="" PURCHASE_ORDER_EQUIPMENT_SAS_URL="" PURCHASE_ORDER_FURNITURE_SAS_URL="" PURCHASE_ORDER_CLEANING_SUPPLIES_SAS_URL="" node composeModel.js +``` + +## Next Steps + +Take a look at our samples for more information about the APIs that are available in the clients. + +[composemodel]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/composeModel.js +[analyzedocumentbymodelid]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/analyzeDocumentByModelId.js +[analyzereceiptbymodelid]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/analyzeReceiptByModelId.js +[buildclassifier]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/buildClassifier.js +[buildmodel]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/buildModel.js +[classifydocument]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/classifyDocument.js +[copymodel]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/copyModel.js +[getclassifier]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/getClassifier.js +[getinfo]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/getInfo.js +[getmodel]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/getModel.js +[listmodels]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/listModels.js +[readdocument]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/readDocument.js +[freesub]: https://azure.microsoft.com/free/ +[package]: https://github.com/Azure/azure-sdk-for-js/tree/main/sdk/documentintelligence/ai-document-intelligence-rest/README.md diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/analyzeDocumentByModelId.js b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/analyzeDocumentByModelId.js new file mode 100644 index 000000000000..036df2c0c66f --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/analyzeDocumentByModelId.js @@ -0,0 +1,55 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to analyze a document using a model with a given ID. The model ID may refer to any model, + * whether custom, prebuilt, composed, etc. + * + * @summary analyze a document using a model by ID + */ + +const DocumentIntelligence = require("@azure-rest/ai-document-intelligence").default, + { getLongRunningPoller, isUnexpected } = require("@azure-rest/ai-document-intelligence"); + +require("dotenv").config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" } + ); + const modelId = process.env.DOCUMENT_INTELLIGENCE_CUSTOM_MODEL_ID || ""; // "prebuilt-layout"; + + const initialResponse = await client.path("/documentModels/{modelId}:analyze", modelId).post({ + contentType: "application/json", + body: { + urlSource: + "https://raw.githubusercontent.com/Azure/azure-sdk-for-js/main/sdk/formrecognizer/ai-form-recognizer/assets/receipt/contoso-receipt.png", + }, + queryParameters: { locale: "en-IN" }, + }); + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = await getLongRunningPoller(client, initialResponse); + const analyzeResult = (await poller.pollUntilDone()).body.analyzeResult; + + const documents = analyzeResult?.documents; + + const document = documents && documents[0]; + if (!document) { + throw new Error("Expected at least one document in the result."); + } + + console.log( + "Extracted document:", + document.docType, + `(confidence: ${document.confidence || ""})` + ); + console.log("Fields:", document.fields); +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/analyzeReceiptByModelId.js b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/analyzeReceiptByModelId.js new file mode 100644 index 000000000000..36c8c9d9776d --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/analyzeReceiptByModelId.js @@ -0,0 +1,59 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to extract elements of a receipt from a URL to a file using the prebuilt receipt model. Rather + * than using the `PrebuiltModels.Receipt` document model, this sample shows the use of the prebuilt model by ID, + * resulting in a weaker type that exactly mirrors the model's field schema at runtime. + * + * The prebuilt receipt model can return several fields. For a detailed list of the fields supported by the + * receipt model, see the `Receipt` type in the documentation, or refer to the following link: + * + * https://aka.ms/azsdk/documentitelligence/receiptfieldschema + * + * @summary use the "prebuilt-receipt" model ID to extract data from a receipt document (weakly-typed) + */ + +const DocumentIntelligence = require("@azure-rest/ai-document-intelligence").default, + { getLongRunningPoller, isUnexpected } = require("@azure-rest/ai-document-intelligence"); + +require("dotenv").config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" } + ); + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-receipt") + .post({ + contentType: "application/json", + body: { + // The Document Intelligence service will access the following URL to a receipt image and extract data from it + urlSource: + "https://raw.githubusercontent.com/Azure/azure-sdk-for-js/main/sdk/formrecognizer/ai-form-recognizer/assets/receipt/contoso-receipt.png", + }, + }); + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = await getLongRunningPoller(client, initialResponse); + + poller.onProgress((state) => console.log("Operation:", state.result, state.status)); + const analyzeResult = (await poller.pollUntilDone()).body.analyzeResult; + + const documents = analyzeResult?.documents; + + const result = documents && documents[0]; + if (result) { + console.log(result.fields); + } else { + throw new Error("Expected at least one receipt in the result."); + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/buildClassifier.js b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/buildClassifier.js new file mode 100644 index 000000000000..647ab91732a4 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/buildClassifier.js @@ -0,0 +1,78 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to programmatically build a custom classifier. + * + * The Document Intelligence service expects the training data to be organized and labeled according to a particular + * convention and stored in an Azure Storage container. For more information about creating a training data set, please + * see the information at the following link to the service's documentation: + * + * https://aka.ms/azsdk/documentitelligence/buildclassifiermodel + * + * @summary build a classifier from a training data set + */ + +const DocumentIntelligence = require("@azure-rest/ai-document-intelligence").default, + { getLongRunningPoller, isUnexpected } = require("@azure-rest/ai-document-intelligence"); + +require("dotenv").config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" } + ); + + const random = Date.now().toString(); + const modelId = + (process.env.CUSTOM_CLASSIFIER_ID || "") + random.substring(random.length - 6); + + const trainingDataSasUrl1 = + process.env.CUSTOM_CLASSIFIER_TRAINING_DATA_SAS_URL_1 || ""; + + const trainingDataSasUrl2 = + process.env.CUSTOM_CLASSIFIER_TRAINING_DATA_SAS_URL_2 || ""; + + const initialResponse = await client.path("/documentClassifiers:build").post({ + body: { + classifierId: modelId, + description: "Custom classifier description", + docTypes: { + foo: { + azureBlobSource: { + containerUrl: trainingDataSasUrl1, + }, + }, + bar: { + azureBlobSource: { + containerUrl: trainingDataSasUrl2, + }, + }, + }, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = await getLongRunningPoller(client, initialResponse); + const classifier = (await poller.pollUntilDone()).body.result; + if (!classifier) { + throw new Error("Expected a DocumentClassifierDetailsOutput response."); + } + + console.log("Classifier ID:", classifier.classifierId); + console.log("Description:", classifier.description); + console.log("Created:", classifier.createdDateTime); + + console.log("Document Types:"); + for (const [docType, details] of Object.entries(classifier.docTypes)) { + console.log(`- Name: "${docType}", source: ${JSON.stringify(details, null, 2)}`); + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/buildModel.js b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/buildModel.js new file mode 100644 index 000000000000..8f3ae570c17c --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/buildModel.js @@ -0,0 +1,78 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to programmatically build a model with a single document type using a training data set. + * + * The Document Intelligence service expects the training data to be organized and labeled according to a particular + * convention and stored in an Azure Storage container. For more information about creating a training data set, please + * see the information at the following link to the service's documentation: + * + * https://aka.ms/azsdk/documentitelligence/buildtrainingset + * + * @summary build a model with a single document type from a training data set + */ + +const DocumentIntelligence = require("@azure-rest/ai-document-intelligence").default, + { getLongRunningPoller, isUnexpected } = require("@azure-rest/ai-document-intelligence"); + +require("dotenv").config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" } + ); + const random = Date.now().toString(); + const modelId = + (process.env.CUSTOM_MODEL_ID || "") + random.substring(random.length - 6); + const trainingDataSasUrl = + process.env.CUSTOM_MODEL_TRAINING_DATA_SAS_URL || ""; + + const initialResponse = await client.path("/documentModels:build").post({ + body: { + buildMode: "template", + modelId, + azureBlobSource: { + containerUrl: trainingDataSasUrl, + }, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = getLongRunningPoller(client, initialResponse); + const model = (await (await poller).pollUntilDone()).body.result; + if (!model) { + throw new Error("Expected a DocumentModelDetailsOutput response."); + } + + console.log("Model ID:", model.modelId); + console.log("Description:", model.description); + console.log("Created:", model.createdDateTime); + + // A model may contain several document types, which describe the possible object structures of fields extracted using + // this model + + console.log("Document Types:"); + for (const [docType, { description, fieldSchema: schema }] of Object.entries( + model.docTypes || {} + )) { + console.log(`- Name: "${docType}"`); + console.log(` Description: "${description}"`); + + // For simplicity, this example will only show top-level field names + console.log(" Fields:"); + + for (const [fieldName, fieldSchema] of Object.entries(schema)) { + console.log(` - "${fieldName}" (${fieldSchema.type})`); + console.log(` ${fieldSchema.description || ""}`); + } + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/classifyDocument.js b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/classifyDocument.js new file mode 100644 index 000000000000..9320ffcfef72 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/classifyDocument.js @@ -0,0 +1,54 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to use a custom classifier to get the document type (class) of a document. + * + * @summary use a custom classifier to classify a document + */ + +const DocumentIntelligence = require("@azure-rest/ai-document-intelligence").default, + { getLongRunningPoller, isUnexpected } = require("@azure-rest/ai-document-intelligence"); + +require("dotenv").config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" } + ); + const documentUrl = + "https://raw.githubusercontent.com/Azure/azure-sdk-for-js/main/sdk/formrecognizer/ai-form-recognizer/assets/invoice/Invoice_1.pdf"; + + const classifierId = process.env.CUSTOM_CLASSIFIER_ID ?? ""; + const initialResponse = await client + .path("/documentClassifiers/{classifierId}:analyze", classifierId) + .post({ + contentType: "application/json", + body: { + urlSource: documentUrl, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = await getLongRunningPoller(client, initialResponse); + const analyzeResult = (await poller.pollUntilDone()).body.analyzeResult; + + if (analyzeResult?.documents === undefined || analyzeResult.documents.length === 0) { + throw new Error("Failed to extract any documents."); + } + + for (const document of analyzeResult.documents) { + console.log( + `Extracted a document with type '${document.docType}' on page ${document.boundingRegions?.[0].pageNumber} (confidence: ${document.confidence})` + ); + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/composeModel.js b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/composeModel.js new file mode 100644 index 000000000000..288f7215d3b8 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/composeModel.js @@ -0,0 +1,117 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +/** + * This sample demonstrates how to create a composed model from several individual labeled models. + * + * We build all of the component models used in the composition operation and then finally create the composed model. + * The resulting composed model will have all of the document types of its component submodels. When used for analysis, + * it will first classify the input as belonging to one of the document types.zzs + * + * @summary create a composed model from several individual labeled models + */ + +const DocumentIntelligence = require("@azure-rest/ai-document-intelligence").default, + { getLongRunningPoller, isUnexpected } = require("@azure-rest/ai-document-intelligence"); + +require("dotenv").config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" } + ); + + // This object will hold the SAS-encoded URLs to containers that hold + // different types of purchase order documents and their labels. + const purchaseOrderSasUrls = { + supplies: + process.env["PURCHASE_ORDER_SUPPLIES_SAS_URL"] || + "", + equipment: + process.env["PURCHASE_ORDER_EQUIPMENT_SAS_URL"] || + "", + furniture: + process.env["PURCHASE_ORDER_FURNITURE_SAS_URL"] || + "", + cleaningSupplies: + process.env["PURCHASE_ORDER_CLEANING_SUPPLIES_SAS_URL"] || + "", + }; + + // We'll put the last few digits of the current timestamp into the model IDs, just to make sure they're unique. + const random = Date.now().toString(); + + const modelIds = await Promise.all( + Object.entries(purchaseOrderSasUrls) + .map(async ([kind, sasUrl]) => { + const modelId = kind + "ComponentModel" + random.substring(random.length - 6); + const initialResponse = await client.path("/documentModels:build").post({ + body: { + buildMode: "template", + modelId: modelId, + azureBlobSource: { + containerUrl: sasUrl, + }, + }, + }); + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = await getLongRunningPoller(client, initialResponse); + const model = (await poller.pollUntilDone()).body.result; + + return model; + }) + .map(async (model) => { + return { modelId: (await model).modelId }; + }) + ); + + // Finally, create the composed model. + + const composedModelId = "purchaseOrders" + random.substring(random.length - 6); + + const initialResponse = await client.path("/documentModels:compose").post({ + body: { + description: + "A composed model that classifies purchase order documents and extracts data from them.", + componentModels: modelIds, + modelId: composedModelId, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = await getLongRunningPoller(client, initialResponse); + + const composedModel = (await poller.pollUntilDone()).body.result; + + console.log("Model ID:", composedModel.modelId); + console.log("Description:", composedModel.description); + console.log("Created:", composedModel.createdDateTime); + + // The composed model should have a document type for each one of the individually built models that are composed into + // this larger model. + + console.log("Document Types:"); + for (const [docType, { description, fieldSchema: schema }] of Object.entries( + composedModel.docTypes || {} + )) { + console.log(`- Name: "${docType}"`); + console.log(` Description: "${description}"`); + + // For simplicity, this example will only show top-level field names + console.log(" Fields:"); + + for (const [fieldName, fieldSchema] of Object.entries(schema)) { + console.log(` - "${fieldName}" (${fieldSchema.type})`); + console.log(` ${fieldSchema.description || ""}`); + } + } +} + +main().catch((err) => { + console.error("The sample encountered an error:", err); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/copyModel.js b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/copyModel.js new file mode 100644 index 000000000000..8825ad830573 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/copyModel.js @@ -0,0 +1,64 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to copy a model from one resource to another. The model is created with a new model ID (and + * optionally a new description) in the destination resource, but will have the same document types as the source model. + * + * @summary copy a model from one resource to another + */ + +const DocumentIntelligence = require("@azure-rest/ai-document-intelligence").default, + { getLongRunningPoller, isUnexpected } = require("@azure-rest/ai-document-intelligence"); + +require("dotenv").config(); + +async function main() { + const random = Date.now().toString(); + const destinationModelId = + (process.env.CUSTOM_MODEL_ID || "") + random.substring(random.length - 6); + + // The authorization must be created by the destination resource. + // const destinationClient = new DocumentModelAdministrationClient(endpoint, credential); + const destinationClient = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" } + ); + // const authorization = await destinationClient.getCopyAuthorization(destinationModelId); + const targetAuth = await destinationClient.path("/documentModels:authorizeCopy").post({ + body: { + modelId: destinationModelId, + }, + }); + if (isUnexpected(targetAuth)) { + throw targetAuth.body.error; + } + const sourceEndpoint = process.env.DOCUMENT_INTELLIGENCE_SOURCE_ENDPOINT || ""; + const sourceModelId = process.env.COPY_SOURCE_MODEL_ID || ""; + + // Then, the source resource can initiate the copy operation. + const sourceClient = DocumentIntelligence(sourceEndpoint, { + key: process.env.DOCUMENT_INTELLIGENCE_SOURCE_API_KEY || "", + }); + + const copyInitResponse = await sourceClient + .path("/documentModels/{modelId}:copyTo", sourceModelId) + .post({ + body: targetAuth.body, + }); + + if (isUnexpected(copyInitResponse)) { + throw copyInitResponse.body.error; + } + const copyPoller = getLongRunningPoller(sourceClient, copyInitResponse); + const model = (await (await copyPoller).pollUntilDone()).body.result; + + console.log("Model ID:", model.modelId); + console.log("Description:", model.description); + console.log("Created:", model.createdDateTime); +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/getClassifier.js b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/getClassifier.js new file mode 100644 index 000000000000..34b9bb4a9cd0 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/getClassifier.js @@ -0,0 +1,42 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to get the details of a custom classifier by its ID, including information about the document + * types that the classifier supports. + * + * @summary get information about a classifier by its ID + */ + +const DocumentIntelligence = require("@azure-rest/ai-document-intelligence").default, + { isUnexpected } = require("@azure-rest/ai-document-intelligence"); + +require("dotenv").config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" } + ); + + const classifierId = process.env.CUSTOM_CLASSIFIER_ID ?? ""; + const classifier = await client.path("/documentClassifiers/{classifierId}", classifierId).get(); + + if (isUnexpected(classifier)) { + throw classifier.body.error; + } + console.log("ID", classifier.body.classifierId); + console.log("Created:", classifier.body.createdDateTime); + console.log("Description: ", classifier.body.description || ""); + + console.log("Document Types:"); + for (const [docType, details] of Object.entries(classifier.body.docTypes || {})) { + // We can also programmatically access a schema of the fields. + console.log(`- Name "${docType}", source: ${JSON.stringify(details, null, 2)}`); + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/getInfo.js b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/getInfo.js new file mode 100644 index 000000000000..0041143d28bd --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/getInfo.js @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to programmatically retrieve metadata about the number of custom models in the Form Recognizer + * resource and the limit of custom models that the resource will allow to be created. + * + * @summary get information about the count and limit of custom models in the resource + */ + +const DocumentIntelligence = require("@azure-rest/ai-document-intelligence").default, + { isUnexpected } = require("@azure-rest/ai-document-intelligence"); + +require("dotenv").config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" } + ); + const info = await client.path("/info").get(); + if (isUnexpected(info)) { + throw info.body.error; + } + console.log( + `Custom document models: ${info.body.customDocumentModels.count} of ${info.body.customDocumentModels.limit}` + ); +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/getModel.js b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/getModel.js new file mode 100644 index 000000000000..f56a54bbcbcd --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/getModel.js @@ -0,0 +1,46 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to get the full information about a custom model by its model ID, including information about + * the document types in the model and their field schemas. + * + * @summary get information about a model by its ID + */ + +const DocumentIntelligence = require("@azure-rest/ai-document-intelligence").default, + { isUnexpected } = require("@azure-rest/ai-document-intelligence"); + +require("dotenv").config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" } + ); + + // The model ID to query. This can be any model ID, not just a custom model, so for example + // the following sample uses `"prebuilt-idDocument"`, but you can change it to any model ID + // you'd like to inspect. + const modelId = "prebuilt-idDocument"; + const model = await client.path("/documentModels/{modelId}", modelId).get(); + + if (isUnexpected(model)) { + throw model.body.error; + } + + console.log("ID", model.body.modelId); + console.log("Created:", model.body.createdDateTime); + console.log("Description: ", model.body.description || ""); + + console.log("Document Types:"); + for (const [docType, { fieldSchema }] of Object.entries(model.body.docTypes || {})) { + // We can also programmatically access a schema of the fields. + console.log("-", docType, JSON.stringify(fieldSchema, undefined, 2)); + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/listModels.js b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/listModels.js new file mode 100644 index 000000000000..d9e7034a4498 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/listModels.js @@ -0,0 +1,49 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to iterate over the models in a resource. This will include both custom and prebuilt models. + * + * @summary iterate over the models in a resource + */ + +const DocumentIntelligence = require("@azure-rest/ai-document-intelligence").default, + { isUnexpected, paginate } = require("@azure-rest/ai-document-intelligence"); + +require("dotenv").config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" } + ); + + const response = await client.path("/documentModels").get(); + if (isUnexpected(response)) { + throw response.body.error; + } + + for await (const model of paginate(client, response)) { + console.log("- ID", model.modelId); + console.log(" Created:", model.createdDateTime); + console.log(" Description: ", model.description || ""); + + // The model summary does not include `docTypes`, so we must additionally call `getModel` to retrieve them + const detailedModel = await client.path("/documentModels/{modelId}", model.modelId).get(); + + if (isUnexpected(detailedModel)) { + throw detailedModel.body.error; + } + const docTypes = detailedModel.body.docTypes; + + console.log(" Document Types:"); + for (const docType of Object.keys(docTypes || {})) { + console.log(" -", docType); + } + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/package.json b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/package.json new file mode 100644 index 000000000000..c89c769c0df8 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/package.json @@ -0,0 +1,32 @@ +{ + "name": "@azure-samples/ai-document-intelligence-js-beta", + "private": true, + "version": "1.0.0", + "description": "Azure Document Intelligence Rest Client client library samples for JavaScript (Beta)", + "engines": { + "node": ">=18.0.0" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/Azure/azure-sdk-for-js.git", + "directory": "sdk/documentintelligence/ai-document-intelligence-rest" + }, + "keywords": [ + "node", + "azure", + "cloud", + "typescript", + "browser", + "isomorphic" + ], + "author": "Microsoft Corporation", + "license": "MIT", + "bugs": { + "url": "https://github.com/Azure/azure-sdk-for-js/issues" + }, + "homepage": "https://github.com/Azure/azure-sdk-for-js/tree/main/sdk/documentintelligence/ai-document-intelligence-rest", + "dependencies": { + "@azure-rest/ai-document-intelligence": "next", + "dotenv": "latest" + } +} diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/readDocument.js b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/readDocument.js new file mode 100644 index 000000000000..23143626b22d --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/readDocument.js @@ -0,0 +1,91 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to extract the text content of a document using the "prebuilt-read" model. + * + * @summary use the prebuilt "read" model to extract information about the text content of a document + */ + +const DocumentIntelligence = require("@azure-rest/ai-document-intelligence").default, + { getLongRunningPoller, isUnexpected } = require("@azure-rest/ai-document-intelligence"); + +require("dotenv").config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" } + ); + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-read") + .post({ + contentType: "application/json", + body: { + urlSource: + "https://raw.githubusercontent.com/Azure/azure-sdk-for-js/main/sdk/formrecognizer/ai-form-recognizer/assets/forms/Invoice_1.pdf", + }, + queryParameters: { features: ["barcodes"] }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = await getLongRunningPoller(client, initialResponse); + const analyzeResult = (await poller.pollUntilDone()).body.analyzeResult; + + // The "prebuilt-read" model (`beginReadDocument` method) only extracts information about the textual content of the + // document, such as page text elements and information about the language of the text. + const pages = analyzeResult?.pages; + const languages = analyzeResult?.languages; + const styles = analyzeResult?.styles; + + if (!pages || pages.length <= 0) { + console.log("No pages were extracted from the document."); + } else { + console.log("Pages:"); + for (const page of pages) { + console.log("- Page", page.pageNumber, `(unit: ${page.unit})`); + console.log(` ${page.width}x${page.height}, angle: ${page.angle}`); + console.log( + ` ${page.lines && page.lines.length} lines, ${page.words && page.words.length} words` + ); + + if (page.lines && page.lines.length > 0) { + console.log(" Lines:"); + + for (const line of page.lines) { + console.log(` - "${line.content}"`); + } + } + } + } + + if (!languages || languages.length <= 0) { + console.log("No language spans were extracted from the document."); + } else { + console.log("Languages:"); + for (const languageEntry of languages) { + console.log( + `- Found language: ${languageEntry.locale} (confidence: ${languageEntry.confidence})` + ); + } + } + + if (!styles || styles.length <= 0) { + console.log("No text styles were extracted from the document."); + } else { + console.log("Styles:"); + for (const style of styles) { + console.log( + `- Handwritten: ${style.isHandwritten ? "yes" : "no"} (confidence=${style.confidence})` + ); + } + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/sample.env b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/sample.env new file mode 100644 index 000000000000..4edbb15d213c --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/javascript/sample.env @@ -0,0 +1,8 @@ +# Used in most samples. Retrieve these values from a Cognitive Services instance +# in the Azure Portal. +DOCUMENT_INTELLIGENCE_ENDPOINT="https://.cognitiveservies.azure.com/" +DOCUMENT_INTELLIGENCE_API_KEY="" + +# Our tests assume that TEST_MODE is "playback" by default. You can +# change it to "record" to generate new recordings, or "live" to bypass the recorder entirely. +# TEST_MODE=playback diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/README.md b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/README.md new file mode 100644 index 000000000000..a18de8ac7f38 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/README.md @@ -0,0 +1,94 @@ +# Azure Document Intelligence Rest Client client library samples for TypeScript (Beta) + +These sample programs show how to use the TypeScript client libraries for Azure Document Intelligence Rest Client in some common scenarios. + +| **File Name** | **Description** | +| ------------------------------------------------------- | ------------------------------------------------------------------------------------------ | +| [composeModel.ts][composemodel] | create a composed model from several individual labeled models | +| [analyzeDocumentByModelId.ts][analyzedocumentbymodelid] | analyze a document using a model by ID | +| [analyzeIdentityDocument.ts][analyzeidentitydocument] | extract data from an identity document | +| [analyzeInvoice.ts][analyzeinvoice] | extract data from an invoice document | +| [analyzeReceipt.ts][analyzereceipt] | extract data from a receipt document | +| [analyzeReceiptByModelId.ts][analyzereceiptbymodelid] | use the "prebuilt-receipt" model ID to extract data from a receipt document (weakly-typed) | +| [analyzeW2TaxForm.ts][analyzew2taxform] | extract data from a United States W2 tax document | +| [buildClassifier.ts][buildclassifier] | build a classifier from a training data set | +| [buildModel.ts][buildmodel] | build a model with a single document type from a training data set | +| [classifyDocument.ts][classifydocument] | use a custom classifier to classify a document | +| [copyModel.ts][copymodel] | copy a model from one resource to another | +| [extractLayout.ts][extractlayout] | use the prebuilt layout model to extract basic document elements only | +| [getClassifier.ts][getclassifier] | get information about a classifier by its ID | +| [getInfo.ts][getinfo] | get information about the count and limit of custom models in the resource | +| [getModel.ts][getmodel] | get information about a model by its ID | +| [listModels.ts][listmodels] | iterate over the models in a resource | +| [readDocument.ts][readdocument] | use the prebuilt "read" model to extract information about the text content of a document | + +## Prerequisites + +The sample programs are compatible with [LTS versions of Node.js](https://github.com/nodejs/release#release-schedule). + +Before running the samples in Node, they must be compiled to JavaScript using the TypeScript compiler. For more information on TypeScript, see the [TypeScript documentation][typescript]. Install the TypeScript compiler using: + +```bash +npm install -g typescript +``` + +You need [an Azure subscription][freesub] to run these sample programs. + +Samples retrieve credentials to access the service endpoint from environment variables. Alternatively, edit the source code to include the appropriate credentials. See each individual sample for details on which environment variables/credentials it requires to function. + +Adapting the samples to run in the browser may require some additional consideration. For details, please see the [package README][package]. + +## Setup + +To run the samples using the published version of the package: + +1. Install the dependencies using `npm`: + +```bash +npm install +``` + +2. Compile the samples: + +```bash +npm run build +``` + +3. Edit the file `sample.env`, adding the correct credentials to access the Azure service and run the samples. Then rename the file from `sample.env` to just `.env`. The sample programs will read this file automatically. + +4. Run whichever samples you like (note that some samples may require additional setup, see the table above): + +```bash +node dist/composeModel.js +``` + +Alternatively, run a single sample with the correct environment variables set (setting up the `.env` file is not required if you do this), for example (cross-platform): + +```bash +npx cross-env DOCUMENT_INTELLIGENCE_ENDPOINT="" DOCUMENT_INTELLIGENCE_API_KEY="" PURCHASE_ORDER_SUPPLIES_SAS_URL="" PURCHASE_ORDER_EQUIPMENT_SAS_URL="" PURCHASE_ORDER_FURNITURE_SAS_URL="" PURCHASE_ORDER_CLEANING_SUPPLIES_SAS_URL="" node dist/composeModel.js +``` + +## Next Steps + +Take a look at our samples for more information about the APIs that are available in the clients. + +[composemodel]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/composeModel.ts +[analyzedocumentbymodelid]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/analyzeDocumentByModelId.ts +[analyzeidentitydocument]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/analyzeIdentityDocument.ts +[analyzeinvoice]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/analyzeInvoice.ts +[analyzereceipt]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/analyzeReceipt.ts +[analyzereceiptbymodelid]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/analyzeReceiptByModelId.ts +[analyzew2taxform]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/analyzeW2TaxForm.ts +[buildclassifier]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/buildClassifier.ts +[buildmodel]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/buildModel.ts +[classifydocument]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/classifyDocument.ts +[copymodel]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/copyModel.ts +[extractlayout]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/extractLayout.ts +[getclassifier]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/getClassifier.ts +[getinfo]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/getInfo.ts +[getmodel]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/getModel.ts +[listmodels]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/listModels.ts +[readdocument]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/readDocument.ts +[freesub]: https://azure.microsoft.com/free/ +[package]: https://github.com/Azure/azure-sdk-for-js/tree/main/sdk/documentintelligence/ai-document-intelligence-rest/README.md +[typescript]: https://www.typescriptlang.org/docs/home.html diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/package.json b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/package.json new file mode 100644 index 000000000000..b995f39180bd --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/package.json @@ -0,0 +1,41 @@ +{ + "name": "@azure-samples/ai-document-intelligence-ts-beta", + "private": true, + "version": "1.0.0", + "description": "Azure Document Intelligence Rest Client client library samples for TypeScript (Beta)", + "engines": { + "node": ">=18.0.0" + }, + "scripts": { + "build": "tsc", + "prebuild": "rimraf dist/" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/Azure/azure-sdk-for-js.git", + "directory": "sdk/documentintelligence/ai-document-intelligence-rest" + }, + "keywords": [ + "node", + "azure", + "cloud", + "typescript", + "browser", + "isomorphic" + ], + "author": "Microsoft Corporation", + "license": "MIT", + "bugs": { + "url": "https://github.com/Azure/azure-sdk-for-js/issues" + }, + "homepage": "https://github.com/Azure/azure-sdk-for-js/tree/main/sdk/documentintelligence/ai-document-intelligence-rest", + "dependencies": { + "@azure-rest/ai-document-intelligence": "next", + "dotenv": "latest" + }, + "devDependencies": { + "@types/node": "^18.0.0", + "typescript": "~5.2.0", + "rimraf": "latest" + } +} diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/sample.env b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/sample.env new file mode 100644 index 000000000000..4edbb15d213c --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/sample.env @@ -0,0 +1,8 @@ +# Used in most samples. Retrieve these values from a Cognitive Services instance +# in the Azure Portal. +DOCUMENT_INTELLIGENCE_ENDPOINT="https://.cognitiveservies.azure.com/" +DOCUMENT_INTELLIGENCE_API_KEY="" + +# Our tests assume that TEST_MODE is "playback" by default. You can +# change it to "record" to generate new recordings, or "live" to bypass the recorder entirely. +# TEST_MODE=playback diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/analyzeDocumentByModelId.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/analyzeDocumentByModelId.ts new file mode 100644 index 000000000000..898e02629d2c --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/analyzeDocumentByModelId.ts @@ -0,0 +1,57 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to analyze a document using a model with a given ID. The model ID may refer to any model, + * whether custom, prebuilt, composed, etc. + * + * @summary analyze a document using a model by ID + */ + +import DocumentIntelligence, { AnalyzeResultOperationOutput, getLongRunningPoller, isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + const modelId = process.env.DOCUMENT_INTELLIGENCE_CUSTOM_MODEL_ID || "";// "prebuilt-layout"; + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", modelId) + .post({ + contentType: "application/json", + body: { + urlSource: "https://raw.githubusercontent.com/Azure/azure-sdk-for-js/main/sdk/formrecognizer/ai-form-recognizer/assets/receipt/contoso-receipt.png", + }, + queryParameters: { locale: "en-IN" }, + }); + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = await getLongRunningPoller(client, initialResponse); + const analyzeResult = ( + (await (poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const documents = analyzeResult?.documents; + + const document = documents && documents[0]; + if (!document) { + throw new Error("Expected at least one document in the result."); + } + + console.log( + "Extracted document:", + document.docType, + `(confidence: ${document.confidence || ""})` + ); + console.log("Fields:", document.fields); +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/analyzeIdentityDocument.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/analyzeIdentityDocument.ts new file mode 100644 index 000000000000..2e9742fb1ba2 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/analyzeIdentityDocument.ts @@ -0,0 +1,73 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to extract elements of an identity document (such as a driver license or passport) from a URL + * to a file using the prebuilt identity document model. + * + * The prebuilt identity document model can return several fields. For a detailed list of the fields supported by the + * identity document model, see the `IdentityDocument` type in the documentation, or refer to the following link: + * + * https://aka.ms/azsdk/documentitelligence/iddocumentfieldschema + * + * @summary extract data from an identity document + */ + +import DocumentIntelligence, { AnalyzeResultOperationOutput, getLongRunningPoller, isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-idDocument") + .post({ + contentType: "application/json", + body: { + // The Document Intelligence service will access the following URL to a driver license image and extract data from it + urlSource: + "https://raw.githubusercontent.com/Azure/azure-sdk-for-js/main/sdk/formrecognizer/ai-form-recognizer/assets/identityDocument/license.png", + }, + queryParameters: { locale: "en-IN" }, + }); + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = await getLongRunningPoller(client, initialResponse); + const analyzeResult = ( + (await (poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const documents = analyzeResult?.documents; + + const document = documents && documents[0]; + + // Use of PrebuiltModels.Receipt above (rather than the raw model ID), adds strong typing of the model's output + if (document) { + // The identity document model has multiple document types, so we need to know which document type was actually + // extracted. + if (document.docType === "idDocument.driverLicense") { + // For the sake of the example, we'll only show a few of the fields that are produced. + console.log("Extracted a Driver License:"); + console.log(document.fields) + } else if (document.docType === "idDocument.passport") { + console.log("Extracted a Passport:"); + console.log(document.fields) + } else { + // The only reason this would happen is if the client library's schema for the prebuilt identity document model is + // out of date, and a new document type has been introduced. + console.error("Unknown document type in result:", document); + } + } else { + throw new Error("Expected at least one receipt in the result."); + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/analyzeInvoice.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/analyzeInvoice.ts new file mode 100644 index 000000000000..5fa9750679bc --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/analyzeInvoice.ts @@ -0,0 +1,62 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to extract elements of an invoice from a URL to a file using the prebuilt invoice model. + * + * The prebuilt invoice model can return several fields. For a detailed list of the fields supported by the invoice + * model, see the `Invoice` type in the documentation, or refer to the following link: + * + * https://aka.ms/azsdk/documentitelligence/invoicefieldschema + * + * @summary extract data from an invoice document + */ + +import DocumentIntelligence, { AnalyzeResultOperationOutput, getLongRunningPoller, isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-invoice") + .post({ + contentType: "application/json", + body: { + // The Document Intelligence service will access the following URL to an invoice image and extract data from it + urlSource: "https://raw.githubusercontent.com/Azure/azure-sdk-for-js/main/sdk/formrecognizer/ai-form-recognizer/assets/invoice/sample_invoice.jpg", + } + }); + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = await getLongRunningPoller(client, initialResponse); + const analyzeResult = ( + (await (poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const documents = analyzeResult?.documents; + + const document = documents && documents[0]; + if (!document) { + throw new Error("Expected at least one document in the result."); + } + + + // Use of PrebuiltModels.Receipt above (rather than the raw model ID), adds strong typing of the model's output + if (document) { + console.log(document.fields); + } else { + throw new Error("Expected at least one receipt in the result."); + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/analyzeReceipt.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/analyzeReceipt.ts new file mode 100644 index 000000000000..4e35cb6653dc --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/analyzeReceipt.ts @@ -0,0 +1,59 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to extract elements of a receipt from a URL to a file using the prebuilt receipt model. + * + * The prebuilt receipt model can return several fields. For a detailed list of the fields supported by the receipt + * model, see the `Receipt` type in the documentation, or refer to the following link: + * + * https://aka.ms/azsdk/documentitelligence/receiptfieldschema + * + * @summary extract data from a receipt document + */ + +import DocumentIntelligence, { AnalyzeResultOperationOutput, getLongRunningPoller, isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-receipt") + .post({ + contentType: "application/json", + body: { + // The Document Intelligence service will access the following URL to a receipt image and extract data from it + urlSource: "https://raw.githubusercontent.com/Azure/azure-sdk-for-js/main/sdk/formrecognizer/ai-form-recognizer/assets/receipt/contoso-receipt.png", + } + }); + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = await getLongRunningPoller(client, initialResponse); + const analyzeResult = ( + (await (poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const documents = analyzeResult?.documents; + + const document = documents && documents[0]; + + + + // Use of PrebuiltModels.Receipt above (rather than the raw model ID), as it adds strong typing of the model's output + if (document) { + console.log(document.fields); + } else { + throw new Error("Expected at least one receipt in the result."); + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/analyzeReceiptByModelId.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/analyzeReceiptByModelId.ts new file mode 100644 index 000000000000..462f21bb5568 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/analyzeReceiptByModelId.ts @@ -0,0 +1,59 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to extract elements of a receipt from a URL to a file using the prebuilt receipt model. Rather + * than using the `PrebuiltModels.Receipt` document model, this sample shows the use of the prebuilt model by ID, + * resulting in a weaker type that exactly mirrors the model's field schema at runtime. + * + * The prebuilt receipt model can return several fields. For a detailed list of the fields supported by the + * receipt model, see the `Receipt` type in the documentation, or refer to the following link: + * + * https://aka.ms/azsdk/documentitelligence/receiptfieldschema + * + * @summary use the "prebuilt-receipt" model ID to extract data from a receipt document (weakly-typed) + */ + +import DocumentIntelligence, { AnalyzeResultOperationOutput, getLongRunningPoller, isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-receipt") + .post({ + contentType: "application/json", + body: { + // The Document Intelligence service will access the following URL to a receipt image and extract data from it + urlSource: "https://raw.githubusercontent.com/Azure/azure-sdk-for-js/main/sdk/formrecognizer/ai-form-recognizer/assets/receipt/contoso-receipt.png", + } + }); + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = await getLongRunningPoller(client, initialResponse); + + poller.onProgress((state) => console.log("Operation:", state.result, state.status)); + const analyzeResult = ( + (await (poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const documents = analyzeResult?.documents; + + const result = documents && documents[0]; + if (result) { + console.log(result.fields); + } else { + throw new Error("Expected at least one receipt in the result."); + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/analyzeW2TaxForm.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/analyzeW2TaxForm.ts new file mode 100644 index 000000000000..77ff44bcb250 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/analyzeW2TaxForm.ts @@ -0,0 +1,63 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to extract elements of a United States W2 tax form from a file using the prebuilt US W2 model. + * + * The prebuilt W2 model can return several fields. For a detailed list of the fields supported by the model, see the + * `TaxUsW2` type in the documentation, or refer to the following link: + * + * https://aka.ms/azsdk/documentitelligence/taxusw2fieldschema + * + * @summary extract data from a United States W2 tax document + */ + +import DocumentIntelligence, { AnalyzeResultOperationOutput, getLongRunningPoller, isUnexpected } from "@azure-rest/ai-document-intelligence"; +import fs from "fs"; +import path from "path"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + + const filePath = fs.readFileSync(path.join(".", "assets", "w2", "w2-single.png")); + + const base64Source = fs.readFileSync(filePath, { encoding: "base64" }); + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-tax.us.w2") + .post({ + contentType: "application/json", + body: { + base64Source, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = await getLongRunningPoller(client, initialResponse); + const analyzeResult = ( + (await (poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const documents = analyzeResult?.documents; + const document = documents?.[0]; + + if (document) { + console.log("Extracted W2 tax form:"); + console.log(document.fields); + } else { + throw new Error("Expected at least one document in the result."); + } +} + +main().catch((error) => { + console.error(error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/buildClassifier.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/buildClassifier.ts new file mode 100644 index 000000000000..d7656ce1bde4 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/buildClassifier.ts @@ -0,0 +1,80 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to programmatically build a custom classifier. + * + * The Document Intelligence service expects the training data to be organized and labeled according to a particular + * convention and stored in an Azure Storage container. For more information about creating a training data set, please + * see the information at the following link to the service's documentation: + * + * https://aka.ms/azsdk/documentitelligence/buildclassifiermodel + * + * @summary build a classifier from a training data set + */ + +import DocumentIntelligence, { DocumentClassifierBuildOperationDetailsOutput, getLongRunningPoller, isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + + const random = Date.now().toString(); + const modelId = + (process.env.CUSTOM_CLASSIFIER_ID || "") + random.substring(random.length - 6); + + const trainingDataSasUrl1 = + process.env.CUSTOM_CLASSIFIER_TRAINING_DATA_SAS_URL_1 || ""; + + const trainingDataSasUrl2 = + process.env.CUSTOM_CLASSIFIER_TRAINING_DATA_SAS_URL_2 || ""; + + const initialResponse = await client.path("/documentClassifiers:build").post({ + body: { + classifierId: modelId, + description: "Custom classifier description", + docTypes: { + foo: { + azureBlobSource: { + containerUrl: trainingDataSasUrl1, + }, + }, + bar: { + azureBlobSource: { + containerUrl: trainingDataSasUrl2, + }, + }, + }, + } + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = await getLongRunningPoller(client, initialResponse); + const classifier = ( + (await (poller).pollUntilDone()).body as DocumentClassifierBuildOperationDetailsOutput + ).result; + if (!classifier) { + throw new Error("Expected a DocumentClassifierDetailsOutput response."); + } + + + console.log("Classifier ID:", classifier.classifierId); + console.log("Description:", classifier.description); + console.log("Created:", classifier.createdDateTime); + + console.log("Document Types:"); + for (const [docType, details] of Object.entries(classifier.docTypes)) { + console.log(`- Name: "${docType}", source: ${JSON.stringify(details, null, 2)}`); + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/buildModel.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/buildModel.ts new file mode 100644 index 000000000000..d26a78e66a48 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/buildModel.ts @@ -0,0 +1,79 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to programmatically build a model with a single document type using a training data set. + * + * The Document Intelligence service expects the training data to be organized and labeled according to a particular + * convention and stored in an Azure Storage container. For more information about creating a training data set, please + * see the information at the following link to the service's documentation: + * + * https://aka.ms/azsdk/documentitelligence/buildtrainingset + * + * @summary build a model with a single document type from a training data set + */ + +import DocumentIntelligence, { DocumentModelBuildOperationDetailsOutput, getLongRunningPoller, isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + const random = Date.now().toString(); + const modelId = + (process.env.CUSTOM_MODEL_ID || "") + random.substring(random.length - 6); + const trainingDataSasUrl = + process.env.CUSTOM_MODEL_TRAINING_DATA_SAS_URL || ""; + + const initialResponse = await client.path("/documentModels:build").post({ + body: { + buildMode: "template", + modelId, + azureBlobSource: { + containerUrl: trainingDataSasUrl + }, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = getLongRunningPoller(client, initialResponse); + const model = ( + (await (await poller).pollUntilDone()).body as DocumentModelBuildOperationDetailsOutput + ).result; + if (!model) { + throw new Error("Expected a DocumentModelDetailsOutput response."); + } + + console.log("Model ID:", model.modelId); + console.log("Description:", model.description); + console.log("Created:", model.createdDateTime); + + // A model may contain several document types, which describe the possible object structures of fields extracted using + // this model + + console.log("Document Types:"); + for (const [docType, { description, fieldSchema: schema }] of Object.entries( + model.docTypes || {} + )) { + console.log(`- Name: "${docType}"`); + console.log(` Description: "${description}"`); + + // For simplicity, this example will only show top-level field names + console.log(" Fields:"); + + for (const [fieldName, fieldSchema] of Object.entries(schema)) { + console.log(` - "${fieldName}" (${fieldSchema.type})`); + console.log(` ${fieldSchema.description || ""}`); + } + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/classifyDocument.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/classifyDocument.ts new file mode 100644 index 000000000000..5fb8537c8f5b --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/classifyDocument.ts @@ -0,0 +1,55 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to use a custom classifier to get the document type (class) of a document. + * + * @summary use a custom classifier to classify a document + */ + +import DocumentIntelligence, { AnalyzeResultOperationOutput, getLongRunningPoller, isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + const documentUrl = + "https://raw.githubusercontent.com/Azure/azure-sdk-for-js/main/sdk/formrecognizer/ai-form-recognizer/assets/invoice/Invoice_1.pdf"; + + const classifierId = process.env.CUSTOM_CLASSIFIER_ID ?? ""; + const initialResponse = await client + .path("/documentClassifiers/{classifierId}:analyze", classifierId) + .post({ + contentType: "application/json", + body: { + urlSource: documentUrl, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = await getLongRunningPoller(client, initialResponse); + const analyzeResult = ( + (await (poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + if (analyzeResult?.documents === undefined || analyzeResult.documents.length === 0) { + throw new Error("Failed to extract any documents."); + } + + for (const document of analyzeResult.documents) { + console.log( + `Extracted a document with type '${document.docType}' on page ${document.boundingRegions?.[0].pageNumber} (confidence: ${document.confidence})` + ); + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/composeModel.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/composeModel.ts new file mode 100644 index 000000000000..5e1044f9cf49 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/composeModel.ts @@ -0,0 +1,119 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +/** + * This sample demonstrates how to create a composed model from several individual labeled models. + * + * We build all of the component models used in the composition operation and then finally create the composed model. + * The resulting composed model will have all of the document types of its component submodels. When used for analysis, + * it will first classify the input as belonging to one of the document types.zzs + * + * @summary create a composed model from several individual labeled models + */ + +import DocumentIntelligence, { DocumentModelBuildOperationDetailsOutput, DocumentModelComposeOperationDetailsOutput, getLongRunningPoller, isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + + // This object will hold the SAS-encoded URLs to containers that hold + // different types of purchase order documents and their labels. + const purchaseOrderSasUrls = { + supplies: + process.env["PURCHASE_ORDER_SUPPLIES_SAS_URL"] || + "", + equipment: + process.env["PURCHASE_ORDER_EQUIPMENT_SAS_URL"] || + "", + furniture: + process.env["PURCHASE_ORDER_FURNITURE_SAS_URL"] || + "", + cleaningSupplies: + process.env["PURCHASE_ORDER_CLEANING_SUPPLIES_SAS_URL"] || + "", + }; + + // We'll put the last few digits of the current timestamp into the model IDs, just to make sure they're unique. + const random = Date.now().toString(); + + const modelIds = await Promise.all( + Object.entries(purchaseOrderSasUrls) + .map(async ([kind, sasUrl]) => { + const modelId = kind + "ComponentModel" + random.substring(random.length - 6); + const initialResponse = await client.path("/documentModels:build").post({ + body: { + buildMode: "template", + modelId: modelId, + azureBlobSource: { + containerUrl: sasUrl, + }, + }, + }); + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = await getLongRunningPoller(client, initialResponse); + const model = ( + (await (poller).pollUntilDone()).body as DocumentModelBuildOperationDetailsOutput + ).result!; + + return model; + }) + .map(async (model) => { return { modelId: (await model).modelId } }) + ); + + // Finally, create the composed model. + + const composedModelId = "purchaseOrders" + random.substring(random.length - 6); + + const initialResponse = await client.path("/documentModels:compose").post({ + body: { + description: "A composed model that classifies purchase order documents and extracts data from them.", + componentModels: modelIds, + modelId: composedModelId, + + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = await getLongRunningPoller(client, initialResponse); + + const composedModel = ( + (await (poller).pollUntilDone()).body as DocumentModelComposeOperationDetailsOutput + ).result!; + + + console.log("Model ID:", composedModel.modelId); + console.log("Description:", composedModel.description); + console.log("Created:", composedModel.createdDateTime); + + // The composed model should have a document type for each one of the individually built models that are composed into + // this larger model. + + console.log("Document Types:"); + for (const [docType, { description, fieldSchema: schema }] of Object.entries( + composedModel.docTypes || {} + )) { + console.log(`- Name: "${docType}"`); + console.log(` Description: "${description}"`); + + // For simplicity, this example will only show top-level field names + console.log(" Fields:"); + + for (const [fieldName, fieldSchema] of Object.entries(schema)) { + console.log(` - "${fieldName}" (${fieldSchema.type})`); + console.log(` ${fieldSchema.description || ""}`); + } + } +} + +main().catch((err) => { + console.error("The sample encountered an error:", err); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/copyModel.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/copyModel.ts new file mode 100644 index 000000000000..c221333d6a8c --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/copyModel.ts @@ -0,0 +1,66 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to copy a model from one resource to another. The model is created with a new model ID (and + * optionally a new description) in the destination resource, but will have the same document types as the source model. + * + * @summary copy a model from one resource to another + */ + +import DocumentIntelligence, { DocumentModelCopyToOperationDetailsOutput, getLongRunningPoller, isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + + const random = Date.now().toString(); + const destinationModelId = + (process.env.CUSTOM_MODEL_ID || "") + random.substring(random.length - 6); + + // The authorization must be created by the destination resource. + // const destinationClient = new DocumentModelAdministrationClient(endpoint, credential); + const destinationClient = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + // const authorization = await destinationClient.getCopyAuthorization(destinationModelId); + const targetAuth = await destinationClient.path("/documentModels:authorizeCopy").post({ + body: { + modelId: destinationModelId, + }, + }); + if (isUnexpected(targetAuth)) { + throw targetAuth.body.error; + } + const sourceEndpoint = process.env.DOCUMENT_INTELLIGENCE_SOURCE_ENDPOINT || ""; + const sourceModelId = process.env.COPY_SOURCE_MODEL_ID || ""; + + // Then, the source resource can initiate the copy operation. + const sourceClient = DocumentIntelligence( + sourceEndpoint, + { key: process.env.DOCUMENT_INTELLIGENCE_SOURCE_API_KEY || "" }) + + const copyInitResponse = await sourceClient + .path("/documentModels/{modelId}:copyTo", sourceModelId) + .post({ + body: targetAuth.body, + }); + + if (isUnexpected(copyInitResponse)) { + throw copyInitResponse.body.error; + } + const copyPoller = getLongRunningPoller(sourceClient, copyInitResponse); + const model = ( + (await (await copyPoller).pollUntilDone()).body as DocumentModelCopyToOperationDetailsOutput + ).result!; + + console.log("Model ID:", model.modelId); + console.log("Description:", model.description); + console.log("Created:", model.createdDateTime); +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/extractLayout.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/extractLayout.ts new file mode 100644 index 000000000000..fb5270bf5b9f --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/extractLayout.ts @@ -0,0 +1,80 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to extract only the basic layout information from a document using the `beginExtractLayout` + * method. Layout information consists of the arrangement of basic OCR elements, such as pages (including their contents + * such as lines, words, and selection marks), tables, and text font styles. + * + * @summary use the prebuilt layout model to extract basic document elements only + */ + +import DocumentIntelligence, { AnalyzeResultOperationOutput, getLongRunningPoller, isUnexpected } from "@azure-rest/ai-document-intelligence"; + + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-layout") + .post({ + contentType: "application/json", + body: { + urlSource: "https://raw.githubusercontent.com/Azure/azure-sdk-for-js/main/sdk/formrecognizer/ai-form-recognizer/assets/forms/Invoice_1.pdf", + } + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = getLongRunningPoller(client, initialResponse); + const analyzeResult = ( + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const pages = analyzeResult?.pages; + const tables = analyzeResult?.tables; + + if (!pages || pages.length <= 0) { + console.log("No pages were extracted from the document."); + } else { + console.log("Pages:"); + for (const page of pages) { + console.log("- Page", page.pageNumber, `(unit: ${page.unit})`); + console.log(` ${page.width}x${page.height}, angle: ${page.angle}`); + console.log( + ` ${page.lines && page.lines.length} lines, ${page.words && page.words.length} words` + ); + + if (page.lines && page.lines.length > 0) { + console.log(" Lines:"); + + for (const line of page.lines) { + console.log(` - "${line.content}"`); + } + } + } + } + + if (!tables || tables.length <= 0) { + console.log("No tables were extracted from the document."); + } else { + console.log("Tables:"); + for (const table of tables) { + console.log( + `- Extracted table: ${table.columnCount} columns, ${table.rowCount} rows (${table.cells.length} cells)` + ); + } + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/getClassifier.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/getClassifier.ts new file mode 100644 index 000000000000..45fc7c3e1da9 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/getClassifier.ts @@ -0,0 +1,41 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to get the details of a custom classifier by its ID, including information about the document + * types that the classifier supports. + * + * @summary get information about a classifier by its ID + */ + +import DocumentIntelligence, { isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + + const classifierId = process.env.CUSTOM_CLASSIFIER_ID ?? ""; + const classifier = await client.path("/documentClassifiers/{classifierId}", classifierId).get(); + + if (isUnexpected(classifier)) { + throw classifier.body.error; + } + console.log("ID", classifier.body.classifierId); + console.log("Created:", classifier.body.createdDateTime); + console.log("Description: ", classifier.body.description || ""); + + console.log("Document Types:"); + for (const [docType, details] of Object.entries(classifier.body.docTypes || {})) { + // We can also programmatically access a schema of the fields. + console.log(`- Name "${docType}", source: ${JSON.stringify(details, null, 2)}`); + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/getInfo.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/getInfo.ts new file mode 100644 index 000000000000..414c3fe065ff --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/getInfo.ts @@ -0,0 +1,32 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to programmatically retrieve metadata about the number of custom models in the Form Recognizer + * resource and the limit of custom models that the resource will allow to be created. + * + * @summary get information about the count and limit of custom models in the resource + */ + +import DocumentIntelligence, { isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + const info = await client.path("/info").get(); + if (isUnexpected(info)) { + throw info.body.error; + } + console.log( + `Custom document models: ${info.body.customDocumentModels.count} of ${info.body.customDocumentModels.limit}` + ); +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/getModel.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/getModel.ts new file mode 100644 index 000000000000..a5b0d1a0903b --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/getModel.ts @@ -0,0 +1,45 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to get the full information about a custom model by its model ID, including information about + * the document types in the model and their field schemas. + * + * @summary get information about a model by its ID + */ + +import DocumentIntelligence, { isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + + // The model ID to query. This can be any model ID, not just a custom model, so for example + // the following sample uses `"prebuilt-idDocument"`, but you can change it to any model ID + // you'd like to inspect. + const modelId = "prebuilt-idDocument"; + const model = await client.path("/documentModels/{modelId}", modelId).get(); + + if (isUnexpected(model)) { + throw model.body.error; + } + + console.log("ID", model.body.modelId); + console.log("Created:", model.body.createdDateTime); + console.log("Description: ", model.body.description || ""); + + console.log("Document Types:"); + for (const [docType, { fieldSchema }] of Object.entries(model.body.docTypes || {})) { + // We can also programmatically access a schema of the fields. + console.log("-", docType, JSON.stringify(fieldSchema, undefined, 2)); + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/listModels.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/listModels.ts new file mode 100644 index 000000000000..54ed144b4548 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/listModels.ts @@ -0,0 +1,49 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to iterate over the models in a resource. This will include both custom and prebuilt models. + * + * @summary iterate over the models in a resource + */ + +import DocumentIntelligence, { isUnexpected, paginate } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + + + const response = await client.path("/documentModels").get(); + if (isUnexpected(response)) { + throw response.body.error; + } + + for await (const model of paginate(client, response)) { + console.log("- ID", model.modelId); + console.log(" Created:", model.createdDateTime); + console.log(" Description: ", model.description || ""); + + // The model summary does not include `docTypes`, so we must additionally call `getModel` to retrieve them + const detailedModel = (await client.path("/documentModels/{modelId}", model.modelId).get()); + + if (isUnexpected(detailedModel)) { + throw detailedModel.body.error; + } + const docTypes = detailedModel.body.docTypes; + + console.log(" Document Types:"); + for (const docType of Object.keys(docTypes || {})) { + console.log(" -", docType); + } + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/readDocument.ts b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/readDocument.ts new file mode 100644 index 000000000000..a20e007c7940 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/src/readDocument.ts @@ -0,0 +1,93 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This sample shows how to extract the text content of a document using the "prebuilt-read" model. + * + * @summary use the prebuilt "read" model to extract information about the text content of a document + */ + +import DocumentIntelligence, { AnalyzeResultOperationOutput, getLongRunningPoller, isUnexpected } from "@azure-rest/ai-document-intelligence"; + +import * as dotenv from "dotenv"; +dotenv.config(); + +async function main() { + const client = DocumentIntelligence( + process.env["DOCUMENT_INTELLIGENCE_ENDPOINT"] || "", + { key: process.env["DOCUMENT_INTELLIGENCE_API_KEY"] || "" }) + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-read") + .post({ + contentType: "application/json", + body: { + urlSource: "https://raw.githubusercontent.com/Azure/azure-sdk-for-js/main/sdk/formrecognizer/ai-form-recognizer/assets/forms/Invoice_1.pdf", + }, + queryParameters: { features: ["barcodes"] }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = await getLongRunningPoller(client, initialResponse); + const analyzeResult = ( + (await (poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + + // The "prebuilt-read" model (`beginReadDocument` method) only extracts information about the textual content of the + // document, such as page text elements and information about the language of the text. + const pages = analyzeResult?.pages; + const languages = analyzeResult?.languages; + const styles = analyzeResult?.styles; + + + if (!pages || pages.length <= 0) { + console.log("No pages were extracted from the document."); + } else { + console.log("Pages:"); + for (const page of pages) { + console.log("- Page", page.pageNumber, `(unit: ${page.unit})`); + console.log(` ${page.width}x${page.height}, angle: ${page.angle}`); + console.log( + ` ${page.lines && page.lines.length} lines, ${page.words && page.words.length} words` + ); + + if (page.lines && page.lines.length > 0) { + console.log(" Lines:"); + + for (const line of page.lines) { + console.log(` - "${line.content}"`); + } + } + } + } + + if (!languages || languages.length <= 0) { + console.log("No language spans were extracted from the document."); + } else { + console.log("Languages:"); + for (const languageEntry of languages) { + console.log( + `- Found language: ${languageEntry.locale} (confidence: ${languageEntry.confidence})` + ); + } + } + + if (!styles || styles.length <= 0) { + console.log("No text styles were extracted from the document."); + } else { + console.log("Styles:"); + for (const style of styles) { + console.log( + `- Handwritten: ${style.isHandwritten ? "yes" : "no"} (confidence=${style.confidence})` + ); + } + } +} + +main().catch((error) => { + console.error("An error occurred:", error); + process.exit(1); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/tsconfig.json b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/tsconfig.json new file mode 100644 index 000000000000..e26ce2a6d8f7 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/samples/v1-beta/typescript/tsconfig.json @@ -0,0 +1,17 @@ +{ + "compilerOptions": { + "target": "ES2020", + "module": "commonjs", + "moduleResolution": "node", + "resolveJsonModule": true, + "esModuleInterop": true, + "allowSyntheticDefaultImports": true, + "strict": true, + "alwaysStrict": true, + "outDir": "dist", + "rootDir": "src" + }, + "include": [ + "src/**.ts" + ] +} diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/src/clientDefinitions.ts b/sdk/documentintelligence/ai-document-intelligence-rest/src/clientDefinitions.ts new file mode 100644 index 000000000000..e2bdfebf6ff6 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/src/clientDefinitions.ts @@ -0,0 +1,290 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +import { + ListOperationsParameters, + GetDocumentModelBuildOperationParameters, + GetDocumentModelComposeOperationParameters, + GetDocumentModelCopyToOperationParameters, + GetDocumentClassifierBuildOperationParameters, + GetOperationParameters, + GetResourceInfoParameters, + GetAnalyzeResultParameters, + AnalyzeDocumentFromStreamParameters, + AnalyzeDocumentParameters, + GetModelParameters, + DeleteModelParameters, + BuildModelParameters, + ComposeModelParameters, + AuthorizeModelCopyParameters, + CopyModelToParameters, + ListModelsParameters, + BuildClassifierParameters, + ListClassifiersParameters, + GetClassifierParameters, + DeleteClassifierParameters, + ClassifyDocumentFromStreamParameters, + ClassifyDocumentParameters, + GetClassifyResultParameters, +} from "./parameters"; +import { + ListOperations200Response, + ListOperationsDefaultResponse, + GetDocumentModelBuildOperation200Response, + GetDocumentModelBuildOperationDefaultResponse, + GetDocumentModelComposeOperation200Response, + GetDocumentModelComposeOperationDefaultResponse, + GetDocumentModelCopyToOperation200Response, + GetDocumentModelCopyToOperationDefaultResponse, + GetDocumentClassifierBuildOperation200Response, + GetDocumentClassifierBuildOperationDefaultResponse, + GetOperation200Response, + GetOperationDefaultResponse, + GetResourceInfo200Response, + GetResourceInfoDefaultResponse, + GetAnalyzeResult200Response, + GetAnalyzeResultDefaultResponse, + AnalyzeDocumentFromStream202Response, + AnalyzeDocumentFromStreamDefaultResponse, + AnalyzeDocument202Response, + AnalyzeDocumentDefaultResponse, + GetModel200Response, + GetModelDefaultResponse, + DeleteModel204Response, + DeleteModelDefaultResponse, + BuildModel202Response, + BuildModelDefaultResponse, + ComposeModel202Response, + ComposeModelDefaultResponse, + AuthorizeModelCopy200Response, + AuthorizeModelCopyDefaultResponse, + CopyModelTo202Response, + CopyModelToDefaultResponse, + ListModels200Response, + ListModelsDefaultResponse, + BuildClassifier202Response, + BuildClassifierDefaultResponse, + ListClassifiers200Response, + ListClassifiersDefaultResponse, + GetClassifier200Response, + GetClassifierDefaultResponse, + DeleteClassifier204Response, + DeleteClassifierDefaultResponse, + ClassifyDocumentFromStream202Response, + ClassifyDocumentFromStreamDefaultResponse, + ClassifyDocument202Response, + ClassifyDocumentDefaultResponse, + GetClassifyResult200Response, + GetClassifyResultDefaultResponse, +} from "./responses"; +import { Client, StreamableMethod } from "@azure-rest/core-client"; + +export interface ListOperations { + /** Lists all operations. */ + get( + options?: ListOperationsParameters + ): StreamableMethod; +} + +export interface GetDocumentModelBuildOperation { + /** Gets operation info. */ + get( + options?: GetDocumentModelBuildOperationParameters + ): StreamableMethod< + GetDocumentModelBuildOperation200Response | GetDocumentModelBuildOperationDefaultResponse + >; + /** Gets operation info. */ + get( + options?: GetDocumentModelComposeOperationParameters + ): StreamableMethod< + GetDocumentModelComposeOperation200Response | GetDocumentModelComposeOperationDefaultResponse + >; + /** Gets operation info. */ + get( + options?: GetDocumentModelCopyToOperationParameters + ): StreamableMethod< + GetDocumentModelCopyToOperation200Response | GetDocumentModelCopyToOperationDefaultResponse + >; + /** Gets operation info. */ + get( + options?: GetDocumentClassifierBuildOperationParameters + ): StreamableMethod< + | GetDocumentClassifierBuildOperation200Response + | GetDocumentClassifierBuildOperationDefaultResponse + >; + /** Gets operation info. */ + get( + options?: GetOperationParameters + ): StreamableMethod; +} + +export interface GetResourceInfo { + /** Return information about the current resource. */ + get( + options?: GetResourceInfoParameters + ): StreamableMethod; +} + +export interface GetAnalyzeResult { + /** Gets the result of document analysis. */ + get( + options?: GetAnalyzeResultParameters + ): StreamableMethod; +} + +export interface AnalyzeDocumentFromStream { + /** Analyzes document with document model. */ + post( + options: AnalyzeDocumentFromStreamParameters + ): StreamableMethod< + AnalyzeDocumentFromStream202Response | AnalyzeDocumentFromStreamDefaultResponse + >; + /** Analyzes document with document model. */ + post( + options: AnalyzeDocumentParameters + ): StreamableMethod; +} + +export interface GetModel { + /** Gets detailed document model information. */ + get( + options?: GetModelParameters + ): StreamableMethod; + /** Deletes document model. */ + delete( + options?: DeleteModelParameters + ): StreamableMethod; +} + +export interface BuildModel { + /** Builds a custom document analysis model. */ + post( + options: BuildModelParameters + ): StreamableMethod; +} + +export interface ComposeModel { + /** Creates a new document model from document types of existing document models. */ + post( + options: ComposeModelParameters + ): StreamableMethod; +} + +export interface AuthorizeModelCopy { + /** + * Generates authorization to copy a document model to this location with + * specified modelId and optional description. + */ + post( + options: AuthorizeModelCopyParameters + ): StreamableMethod; +} + +export interface CopyModelTo { + /** Copies document model to the target resource, region, and modelId. */ + post( + options: CopyModelToParameters + ): StreamableMethod; +} + +export interface ListModels { + /** List all document models */ + get( + options?: ListModelsParameters + ): StreamableMethod; +} + +export interface BuildClassifier { + /** Builds a custom document classifier. */ + post( + options: BuildClassifierParameters + ): StreamableMethod; +} + +export interface ListClassifiers { + /** List all document classifiers. */ + get( + options?: ListClassifiersParameters + ): StreamableMethod; +} + +export interface GetClassifier { + /** Gets detailed document classifier information. */ + get( + options?: GetClassifierParameters + ): StreamableMethod; + /** Deletes document classifier. */ + delete( + options?: DeleteClassifierParameters + ): StreamableMethod; +} + +export interface ClassifyDocumentFromStream { + /** Classifies document with document classifier. */ + post( + options: ClassifyDocumentFromStreamParameters + ): StreamableMethod< + ClassifyDocumentFromStream202Response | ClassifyDocumentFromStreamDefaultResponse + >; + /** Classifies document with document classifier. */ + post( + options: ClassifyDocumentParameters + ): StreamableMethod; +} + +export interface GetClassifyResult { + /** Gets the result of document classifier. */ + get( + options?: GetClassifyResultParameters + ): StreamableMethod; +} + +export interface Routes { + /** Resource for '/operations' has methods for the following verbs: get */ + (path: "/operations"): ListOperations; + /** Resource for '/operations/\{operationId\}' has methods for the following verbs: get */ + (path: "/operations/{operationId}", operationId: string): GetDocumentModelBuildOperation; + /** Resource for '/info' has methods for the following verbs: get */ + (path: "/info"): GetResourceInfo; + /** Resource for '/documentModels/\{modelId\}/analyzeResults/\{resultId\}' has methods for the following verbs: get */ + ( + path: "/documentModels/{modelId}/analyzeResults/{resultId}", + modelId: string, + resultId: string + ): GetAnalyzeResult; + /** Resource for '/documentModels/\{modelId\}:analyze' has methods for the following verbs: post */ + (path: "/documentModels/{modelId}:analyze", modelId: string): AnalyzeDocumentFromStream; + /** Resource for '/documentModels/\{modelId\}' has methods for the following verbs: get, delete */ + (path: "/documentModels/{modelId}", modelId: string): GetModel; + /** Resource for '/documentModels:build' has methods for the following verbs: post */ + (path: "/documentModels:build"): BuildModel; + /** Resource for '/documentModels:compose' has methods for the following verbs: post */ + (path: "/documentModels:compose"): ComposeModel; + /** Resource for '/documentModels:authorizeCopy' has methods for the following verbs: post */ + (path: "/documentModels:authorizeCopy"): AuthorizeModelCopy; + /** Resource for '/documentModels/\{modelId\}:copyTo' has methods for the following verbs: post */ + (path: "/documentModels/{modelId}:copyTo", modelId: string): CopyModelTo; + /** Resource for '/documentModels' has methods for the following verbs: get */ + (path: "/documentModels"): ListModels; + /** Resource for '/documentClassifiers:build' has methods for the following verbs: post */ + (path: "/documentClassifiers:build"): BuildClassifier; + /** Resource for '/documentClassifiers' has methods for the following verbs: get */ + (path: "/documentClassifiers"): ListClassifiers; + /** Resource for '/documentClassifiers/\{classifierId\}' has methods for the following verbs: get, delete */ + (path: "/documentClassifiers/{classifierId}", classifierId: string): GetClassifier; + /** Resource for '/documentClassifiers/\{classifierId\}:analyze' has methods for the following verbs: post */ + ( + path: "/documentClassifiers/{classifierId}:analyze", + classifierId: string + ): ClassifyDocumentFromStream; + /** Resource for '/documentClassifiers/\{classifierId\}/analyzeResults/\{resultId\}' has methods for the following verbs: get */ + ( + path: "/documentClassifiers/{classifierId}/analyzeResults/{resultId}", + classifierId: string, + resultId: string + ): GetClassifyResult; +} + +export type DocumentIntelligenceClient = Client & { + path: Routes; +}; diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/src/documentIntelligence.ts b/sdk/documentintelligence/ai-document-intelligence-rest/src/documentIntelligence.ts new file mode 100644 index 000000000000..70c2d7b79521 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/src/documentIntelligence.ts @@ -0,0 +1,44 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +import { getClient, ClientOptions } from "@azure-rest/core-client"; +import { logger } from "./logger"; +import { TokenCredential, KeyCredential } from "@azure/core-auth"; +import { DocumentIntelligenceClient } from "./clientDefinitions"; + +/** + * Initialize a new instance of `DocumentIntelligenceClient` + * @param endpoint - The Document Intelligence service endpoint. + * @param credentials - uniquely identify client credential + * @param options - the parameter for all optional parameters + */ +export default function createClient( + endpoint: string, + credentials: TokenCredential | KeyCredential, + options: ClientOptions = {} +): DocumentIntelligenceClient { + const baseUrl = options.baseUrl ?? `${endpoint}/documentintelligence`; + options.apiVersion = options.apiVersion ?? "2023-10-31-preview"; + const userAgentInfo = `azsdk-js-ai-document-intelligence-rest/1.0.0-beta.1`; + const userAgentPrefix = + options.userAgentOptions && options.userAgentOptions.userAgentPrefix + ? `${options.userAgentOptions.userAgentPrefix} ${userAgentInfo}` + : `${userAgentInfo}`; + options = { + ...options, + userAgentOptions: { + userAgentPrefix, + }, + loggingOptions: { + logger: options.loggingOptions?.logger ?? logger.info, + }, + credentials: { + scopes: options.credentials?.scopes ?? ["https://cognitiveservices.azure.com/.default"], + apiKeyHeaderName: options.credentials?.apiKeyHeaderName ?? "Ocp-Apim-Subscription-Key", + }, + }; + + const client = getClient(baseUrl, credentials, options) as DocumentIntelligenceClient; + + return client; +} diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/src/index.ts b/sdk/documentintelligence/ai-document-intelligence-rest/src/index.ts new file mode 100644 index 000000000000..7f944a9ffb8a --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/src/index.ts @@ -0,0 +1,16 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +import DocumentIntelligence from "./documentIntelligence"; + +export * from "./documentIntelligence"; +export * from "./parameters"; +export * from "./responses"; +export * from "./clientDefinitions"; +export * from "./isUnexpected"; +export * from "./models"; +export * from "./outputModels"; +export * from "./paginateHelper"; +export * from "./pollingHelper"; + +export default DocumentIntelligence; diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/src/isUnexpected.ts b/sdk/documentintelligence/ai-document-intelligence-rest/src/isUnexpected.ts new file mode 100644 index 000000000000..13867bac0d8d --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/src/isUnexpected.ts @@ -0,0 +1,278 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +import { + ListOperations200Response, + ListOperationsDefaultResponse, + GetDocumentModelBuildOperation200Response, + GetDocumentModelBuildOperationDefaultResponse, + GetResourceInfo200Response, + GetResourceInfoDefaultResponse, + GetAnalyzeResult200Response, + GetAnalyzeResultDefaultResponse, + AnalyzeDocumentFromStream202Response, + AnalyzeDocumentFromStreamLogicalResponse, + AnalyzeDocumentFromStreamDefaultResponse, + GetModel200Response, + GetModelDefaultResponse, + DeleteModel204Response, + DeleteModelDefaultResponse, + BuildModel202Response, + BuildModelLogicalResponse, + BuildModelDefaultResponse, + ComposeModel202Response, + ComposeModelLogicalResponse, + ComposeModelDefaultResponse, + AuthorizeModelCopy200Response, + AuthorizeModelCopyDefaultResponse, + CopyModelTo202Response, + CopyModelToLogicalResponse, + CopyModelToDefaultResponse, + ListModels200Response, + ListModelsDefaultResponse, + BuildClassifier202Response, + BuildClassifierLogicalResponse, + BuildClassifierDefaultResponse, + ListClassifiers200Response, + ListClassifiersDefaultResponse, + GetClassifier200Response, + GetClassifierDefaultResponse, + DeleteClassifier204Response, + DeleteClassifierDefaultResponse, + ClassifyDocumentFromStream202Response, + ClassifyDocumentFromStreamLogicalResponse, + ClassifyDocumentFromStreamDefaultResponse, + GetClassifyResult200Response, + GetClassifyResultDefaultResponse, +} from "./responses"; + +const responseMap: Record = { + "GET /operations": ["200"], + "GET /operations/{operationId}": ["200"], + "GET /info": ["200"], + "GET /documentModels/{modelId}/analyzeResults/{resultId}": ["200"], + "POST /documentModels/{modelId}:analyze": ["202"], + "GET /documentModels/{modelId}:analyze": ["200", "202"], + "GET /documentModels/{modelId}": ["200"], + "DELETE /documentModels/{modelId}": ["204"], + "POST /documentModels:build": ["202"], + "GET /documentModels:build": ["200", "202"], + "POST /documentModels:compose": ["202"], + "GET /documentModels:compose": ["200", "202"], + "POST /documentModels:authorizeCopy": ["200"], + "POST /documentModels/{modelId}:copyTo": ["202"], + "GET /documentModels/{modelId}:copyTo": ["200", "202"], + "GET /documentModels": ["200"], + "POST /documentClassifiers:build": ["202"], + "GET /documentClassifiers:build": ["200", "202"], + "GET /documentClassifiers": ["200"], + "GET /documentClassifiers/{classifierId}": ["200"], + "DELETE /documentClassifiers/{classifierId}": ["204"], + "POST /documentClassifiers/{classifierId}:analyze": ["202"], + "GET /documentClassifiers/{classifierId}:analyze": ["200", "202"], + "GET /documentClassifiers/{classifierId}/analyzeResults/{resultId}": ["200"], +}; + +export function isUnexpected( + response: ListOperations200Response | ListOperationsDefaultResponse +): response is ListOperationsDefaultResponse; +export function isUnexpected( + response: + | GetDocumentModelBuildOperation200Response + | GetDocumentModelBuildOperationDefaultResponse +): response is GetDocumentModelBuildOperationDefaultResponse; +export function isUnexpected( + response: GetResourceInfo200Response | GetResourceInfoDefaultResponse +): response is GetResourceInfoDefaultResponse; +export function isUnexpected( + response: GetAnalyzeResult200Response | GetAnalyzeResultDefaultResponse +): response is GetAnalyzeResultDefaultResponse; +export function isUnexpected( + response: + | AnalyzeDocumentFromStream202Response + | AnalyzeDocumentFromStreamLogicalResponse + | AnalyzeDocumentFromStreamDefaultResponse +): response is AnalyzeDocumentFromStreamDefaultResponse; +export function isUnexpected( + response: GetModel200Response | GetModelDefaultResponse +): response is GetModelDefaultResponse; +export function isUnexpected( + response: DeleteModel204Response | DeleteModelDefaultResponse +): response is DeleteModelDefaultResponse; +export function isUnexpected( + response: BuildModel202Response | BuildModelLogicalResponse | BuildModelDefaultResponse +): response is BuildModelDefaultResponse; +export function isUnexpected( + response: ComposeModel202Response | ComposeModelLogicalResponse | ComposeModelDefaultResponse +): response is ComposeModelDefaultResponse; +export function isUnexpected( + response: AuthorizeModelCopy200Response | AuthorizeModelCopyDefaultResponse +): response is AuthorizeModelCopyDefaultResponse; +export function isUnexpected( + response: CopyModelTo202Response | CopyModelToLogicalResponse | CopyModelToDefaultResponse +): response is CopyModelToDefaultResponse; +export function isUnexpected( + response: ListModels200Response | ListModelsDefaultResponse +): response is ListModelsDefaultResponse; +export function isUnexpected( + response: + | BuildClassifier202Response + | BuildClassifierLogicalResponse + | BuildClassifierDefaultResponse +): response is BuildClassifierDefaultResponse; +export function isUnexpected( + response: ListClassifiers200Response | ListClassifiersDefaultResponse +): response is ListClassifiersDefaultResponse; +export function isUnexpected( + response: GetClassifier200Response | GetClassifierDefaultResponse +): response is GetClassifierDefaultResponse; +export function isUnexpected( + response: DeleteClassifier204Response | DeleteClassifierDefaultResponse +): response is DeleteClassifierDefaultResponse; +export function isUnexpected( + response: + | ClassifyDocumentFromStream202Response + | ClassifyDocumentFromStreamLogicalResponse + | ClassifyDocumentFromStreamDefaultResponse +): response is ClassifyDocumentFromStreamDefaultResponse; +export function isUnexpected( + response: GetClassifyResult200Response | GetClassifyResultDefaultResponse +): response is GetClassifyResultDefaultResponse; +export function isUnexpected( + response: + | ListOperations200Response + | ListOperationsDefaultResponse + | GetDocumentModelBuildOperation200Response + | GetDocumentModelBuildOperationDefaultResponse + | GetResourceInfo200Response + | GetResourceInfoDefaultResponse + | GetAnalyzeResult200Response + | GetAnalyzeResultDefaultResponse + | AnalyzeDocumentFromStream202Response + | AnalyzeDocumentFromStreamLogicalResponse + | AnalyzeDocumentFromStreamDefaultResponse + | GetModel200Response + | GetModelDefaultResponse + | DeleteModel204Response + | DeleteModelDefaultResponse + | BuildModel202Response + | BuildModelLogicalResponse + | BuildModelDefaultResponse + | ComposeModel202Response + | ComposeModelLogicalResponse + | ComposeModelDefaultResponse + | AuthorizeModelCopy200Response + | AuthorizeModelCopyDefaultResponse + | CopyModelTo202Response + | CopyModelToLogicalResponse + | CopyModelToDefaultResponse + | ListModels200Response + | ListModelsDefaultResponse + | BuildClassifier202Response + | BuildClassifierLogicalResponse + | BuildClassifierDefaultResponse + | ListClassifiers200Response + | ListClassifiersDefaultResponse + | GetClassifier200Response + | GetClassifierDefaultResponse + | DeleteClassifier204Response + | DeleteClassifierDefaultResponse + | ClassifyDocumentFromStream202Response + | ClassifyDocumentFromStreamLogicalResponse + | ClassifyDocumentFromStreamDefaultResponse + | GetClassifyResult200Response + | GetClassifyResultDefaultResponse +): response is + | ListOperationsDefaultResponse + | GetDocumentModelBuildOperationDefaultResponse + | GetResourceInfoDefaultResponse + | GetAnalyzeResultDefaultResponse + | AnalyzeDocumentFromStreamDefaultResponse + | GetModelDefaultResponse + | DeleteModelDefaultResponse + | BuildModelDefaultResponse + | ComposeModelDefaultResponse + | AuthorizeModelCopyDefaultResponse + | CopyModelToDefaultResponse + | ListModelsDefaultResponse + | BuildClassifierDefaultResponse + | ListClassifiersDefaultResponse + | GetClassifierDefaultResponse + | DeleteClassifierDefaultResponse + | ClassifyDocumentFromStreamDefaultResponse + | GetClassifyResultDefaultResponse { + const lroOriginal = response.headers["x-ms-original-url"]; + const url = new URL(lroOriginal ?? response.request.url); + const method = response.request.method; + let pathDetails = responseMap[`${method} ${url.pathname}`]; + if (!pathDetails) { + pathDetails = getParametrizedPathSuccess(method, url.pathname); + } + return !pathDetails.includes(response.status); +} + +function getParametrizedPathSuccess(method: string, path: string): string[] { + const pathParts = path.split("/"); + + // Traverse list to match the longest candidate + // matchedLen: the length of candidate path + // matchedValue: the matched status code array + let matchedLen = -1, + matchedValue: string[] = []; + + // Iterate the responseMap to find a match + for (const [key, value] of Object.entries(responseMap)) { + // Extracting the path from the map key which is in format + // GET /path/foo + if (!key.startsWith(method)) { + continue; + } + const candidatePath = getPathFromMapKey(key); + // Get each part of the url path + const candidateParts = candidatePath.split("/"); + + // track if we have found a match to return the values found. + let found = true; + for (let i = candidateParts.length - 1, j = pathParts.length - 1; i >= 1 && j >= 1; i--, j--) { + if (candidateParts[i]?.startsWith("{") && candidateParts[i]?.indexOf("}") !== -1) { + const start = candidateParts[i]!.indexOf("}") + 1, + end = candidateParts[i]?.length; + // If the current part of the candidate is a "template" part + // Try to use the suffix of pattern to match the path + // {guid} ==> $ + // {guid}:export ==> :export$ + const isMatched = new RegExp(`${candidateParts[i]?.slice(start, end)}`).test( + pathParts[j] || "" + ); + + if (!isMatched) { + found = false; + break; + } + continue; + } + + // If the candidate part is not a template and + // the parts don't match mark the candidate as not found + // to move on with the next candidate path. + if (candidateParts[i] !== pathParts[j]) { + found = false; + break; + } + } + + // We finished evaluating the current candidate parts + // Update the matched value if and only if we found the longer pattern + if (found && candidatePath.length > matchedLen) { + matchedLen = candidatePath.length; + matchedValue = value; + } + } + + return matchedValue; +} + +function getPathFromMapKey(mapKey: string): string { + const pathStart = mapKey.indexOf("/"); + return mapKey.slice(pathStart); +} diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/src/logger.ts b/sdk/documentintelligence/ai-document-intelligence-rest/src/logger.ts new file mode 100644 index 000000000000..0c700c8102c6 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/src/logger.ts @@ -0,0 +1,5 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +import { createClientLogger } from "@azure/logger"; +export const logger = createClientLogger("ai-document-intelligence"); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/src/models.ts b/sdk/documentintelligence/ai-document-intelligence-rest/src/models.ts new file mode 100644 index 000000000000..b6f908c9dcc8 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/src/models.ts @@ -0,0 +1,148 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** Azure Blob Storage content. */ +export interface AzureBlobContentSource { + /** Azure Blob Storage container URL. */ + containerUrl: string; + /** Blob name prefix. */ + prefix?: string; +} + +/** File list in Azure Blob Storage. */ +export interface AzureBlobFileListContentSource { + /** Azure Blob Storage container URL. */ + containerUrl: string; + /** Path to a JSONL file within the container specifying a subset of documents. */ + fileList: string; +} + +/** Classifier document type info. */ +export interface ClassifierDocumentTypeDetails { + /** + * Type of training data source. + * + * Possible values: url, base64, azureBlob, azureBlobFileList + */ + sourceKind?: string; + /** + * Azure Blob Storage location containing the training data for a classifier + * document type. Either azureBlobSource or azureBlobFileListSource must be + * specified. + */ + azureBlobSource?: AzureBlobContentSource; + /** + * Azure Blob Storage file list specifying the training data for a classifier + * document type. Either azureBlobSource or azureBlobFileListSource must be + * specified. + */ + azureBlobFileListSource?: AzureBlobFileListContentSource; +} + +/** Document analysis parameters. */ +export interface AnalyzeDocumentRequest { + /** Document URL to analyze. Either urlSource or base64Source must be specified. */ + urlSource?: string; + /** + * Base64 encoding of the document to analyze. Either urlSource or base64Source + * must be specified. + */ + base64Source?: string; +} + +/** Request body to build a new custom document model. */ +export interface BuildDocumentModelRequest { + /** Unique document model name. */ + modelId: string; + /** Document model description. */ + description?: string; + /** + * Custom document model build mode. + * + * Possible values: template, neural + */ + buildMode: string; + /** + * Azure Blob Storage location containing the training data. Either + * azureBlobSource or azureBlobFileListSource must be specified. + */ + azureBlobSource?: AzureBlobContentSource; + /** + * Azure Blob Storage file list specifying the training data. Either + * azureBlobSource or azureBlobFileListSource must be specified. + */ + azureBlobFileListSource?: AzureBlobFileListContentSource; + /** List of key-value tag attributes associated with the document model. */ + tags?: Record; +} + +/** Request body to create a composed document model from component document models. */ +export interface ComposeDocumentModelRequest { + /** Unique document model name. */ + modelId: string; + /** Document model description. */ + description?: string; + /** List of component document models to compose. */ + componentModels: Array; + /** List of key-value tag attributes associated with the document model. */ + tags?: Record; +} + +/** A component of a composed document model. */ +export interface ComponentDocumentModelDetails { + /** Unique document model name. */ + modelId: string; +} + +/** Request body to authorize document model copy. */ +export interface AuthorizeCopyRequest { + /** Unique document model name. */ + modelId: string; + /** Document model description. */ + description?: string; + /** List of key-value tag attributes associated with the document model. */ + tags?: Record; +} + +/** + * Authorization to copy a document model to the specified target resource and + * modelId. + */ +export interface CopyAuthorization { + /** ID of the target Azure resource where the document model should be copied to. */ + targetResourceId: string; + /** + * Location of the target Azure resource where the document model should be copied + * to. + */ + targetResourceRegion: string; + /** Identifier of the target document model. */ + targetModelId: string; + /** URL of the copied document model in the target account. */ + targetModelLocation: string; + /** Token used to authorize the request. */ + accessToken: string; + /** Date/time when the access token expires. */ + expirationDateTime: Date | string; +} + +/** Request body to build a new custom document classifier. */ +export interface BuildDocumentClassifierRequest { + /** Unique document classifier name. */ + classifierId: string; + /** Document classifier description. */ + description?: string; + /** List of document types to classify against. */ + docTypes: Record; +} + +/** Document classification parameters. */ +export interface ClassifyDocumentRequest { + /** Document URL to classify. Either urlSource or base64Source must be specified. */ + urlSource?: string; + /** + * Base64 encoding of the document to classify. Either urlSource or base64Source + * must be specified. + */ + base64Source?: string; +} diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/src/outputModels.ts b/sdk/documentintelligence/ai-document-intelligence-rest/src/outputModels.ts new file mode 100644 index 000000000000..fbe755aec611 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/src/outputModels.ts @@ -0,0 +1,824 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +import { Paged } from "@azure/core-paging"; + +/** Operation info. */ +export interface OperationDetailsOutputParent { + /** Operation ID */ + operationId: string; + /** + * Operation status. + * + * Possible values: notStarted, running, failed, succeeded, canceled + */ + status: string; + /** Operation progress (0-100). */ + percentCompleted?: number; + /** Date and time (UTC) when the operation was created. */ + createdDateTime: string; + /** Date and time (UTC) when the status was last updated. */ + lastUpdatedDateTime: string; + /** URL of the resource targeted by this operation. */ + resourceLocation: string; + /** API version used to create this operation. */ + apiVersion?: string; + /** List of key-value tag attributes associated with the document model. */ + tags?: Record; + /** Encountered error. */ + error?: ErrorModelOutput; + kind: string; +} + +/** The error object. */ +export interface ErrorModelOutput { + /** One of a server-defined set of error codes. */ + code: string; + /** A human-readable representation of the error. */ + message: string; + /** The target of the error. */ + target?: string; + /** An array of details about specific errors that led to this reported error. */ + details?: Array; + /** An object containing more specific information than the current object about the error. */ + innererror?: InnerErrorOutput; +} + +/** An object containing more specific information about the error. */ +export interface InnerErrorOutput { + /** One of a server-defined set of error codes. */ + code?: string; + /** A human-readable representation of the error. */ + message?: string; + /** Inner error. */ + innererror?: InnerErrorOutput; +} + +/** Get Operation response object. */ +export interface DocumentModelBuildOperationDetailsOutput extends OperationDetailsOutputParent { + /** Operation result upon success. */ + result?: DocumentModelDetailsOutput; + /** Type of operation. */ + kind: "documentModelBuild"; +} + +/** Document model info. */ +export interface DocumentModelDetailsOutput { + /** Unique document model name. */ + modelId: string; + /** Document model description. */ + description?: string; + /** Date and time (UTC) when the document model was created. */ + createdDateTime: string; + /** Date and time (UTC) when the document model will expire. */ + expirationDateTime?: string; + /** API version used to create this document model. */ + apiVersion?: string; + /** List of key-value tag attributes associated with the document model. */ + tags?: Record; + /** + * Custom document model build mode. + * + * Possible values: template, neural + */ + buildMode?: string; + /** + * Azure Blob Storage location containing the training data. Either + * azureBlobSource or azureBlobFileListSource must be specified. + */ + azureBlobSource?: AzureBlobContentSourceOutput; + /** + * Azure Blob Storage file list specifying the training data. Either + * azureBlobSource or azureBlobFileListSource must be specified. + */ + azureBlobFileListSource?: AzureBlobFileListContentSourceOutput; + /** Supported document types. */ + docTypes?: Record; +} + +/** Azure Blob Storage content. */ +export interface AzureBlobContentSourceOutput { + /** Azure Blob Storage container URL. */ + containerUrl: string; + /** Blob name prefix. */ + prefix?: string; +} + +/** File list in Azure Blob Storage. */ +export interface AzureBlobFileListContentSourceOutput { + /** Azure Blob Storage container URL. */ + containerUrl: string; + /** Path to a JSONL file within the container specifying a subset of documents. */ + fileList: string; +} + +/** Document type info. */ +export interface DocumentTypeDetailsOutput { + /** Document model description. */ + description?: string; + /** + * Custom document model build mode. + * + * Possible values: template, neural + */ + buildMode?: string; + /** Description of the document semantic schema using a JSON Schema style syntax. */ + fieldSchema: Record; + /** Estimated confidence for each field. */ + fieldConfidence?: Record; +} + +/** Description of the field semantic schema using a JSON Schema style syntax. */ +export interface DocumentFieldSchemaOutput { + /** + * Semantic data type of the field value. + * + * Possible values: string, date, time, phoneNumber, number, integer, selectionMark, countryRegion, signature, array, object, currency, address, boolean + */ + type: string; + /** Field description. */ + description?: string; + /** Example field content. */ + example?: string; + /** Field type schema of each array element. */ + items?: DocumentFieldSchemaOutput; + /** Named sub-fields of the object field. */ + properties?: Record; +} + +/** Get Operation response object. */ +export interface DocumentModelComposeOperationDetailsOutput extends OperationDetailsOutputParent { + /** Operation result upon success. */ + result?: DocumentModelDetailsOutput; + /** Type of operation. */ + kind: "documentModelCompose"; +} + +/** Get Operation response object. */ +export interface DocumentModelCopyToOperationDetailsOutput extends OperationDetailsOutputParent { + /** Operation result upon success. */ + result?: DocumentModelDetailsOutput; + /** Type of operation. */ + kind: "documentModelCopyTo"; +} + +/** Get Operation response object. */ +export interface DocumentClassifierBuildOperationDetailsOutput + extends OperationDetailsOutputParent { + /** Operation result upon success. */ + result?: DocumentClassifierDetailsOutput; + /** Type of operation. */ + kind: "documentClassifierBuild"; +} + +/** Document classifier info. */ +export interface DocumentClassifierDetailsOutput { + /** Unique document classifier name. */ + classifierId: string; + /** Document classifier description. */ + description?: string; + /** Date and time (UTC) when the document classifier was created. */ + createdDateTime: string; + /** Date and time (UTC) when the document classifier will expire. */ + expirationDateTime?: string; + /** API version used to create this document classifier. */ + apiVersion: string; + /** List of document types to classify against. */ + docTypes: Record; +} + +/** Classifier document type info. */ +export interface ClassifierDocumentTypeDetailsOutput { + /** + * Type of training data source. + * + * Possible values: url, base64, azureBlob, azureBlobFileList + */ + sourceKind?: string; + /** + * Azure Blob Storage location containing the training data for a classifier + * document type. Either azureBlobSource or azureBlobFileListSource must be + * specified. + */ + azureBlobSource?: AzureBlobContentSourceOutput; + /** + * Azure Blob Storage file list specifying the training data for a classifier + * document type. Either azureBlobSource or azureBlobFileListSource must be + * specified. + */ + azureBlobFileListSource?: AzureBlobFileListContentSourceOutput; +} + +/** Error response object. */ +export interface ErrorResponseOutput { + /** Error info. */ + error: ErrorModelOutput; +} + +/** General information regarding the current resource. */ +export interface ResourceDetailsOutput { + /** Details regarding custom document models. */ + customDocumentModels: CustomDocumentModelsDetailsOutput; + /** Quota used, limit, and next reset date/time. */ + customNeuralDocumentModelBuilds: QuotaDetailsOutput; +} + +/** Details regarding custom document models. */ +export interface CustomDocumentModelsDetailsOutput { + /** Number of custom document models in the current resource. */ + count: number; + /** Maximum number of custom document models supported in the current resource. */ + limit: number; +} + +/** Quota used, limit, and next reset date/time. */ +export interface QuotaDetailsOutput { + /** Amount of the resource quota used. */ + used: number; + /** Resource quota limit. */ + quota: number; + /** Date/time when the resource quota usage will be reset. */ + quotaResetDateTime: string; +} + +/** Status and result of the analyze operation. */ +export interface AnalyzeResultOperationOutput { + /** + * Operation status. + * + * Possible values: notStarted, running, failed, succeeded, canceled + */ + status: string; + /** Date and time (UTC) when the analyze operation was submitted. */ + createdDateTime: string; + /** Date and time (UTC) when the status was last updated. */ + lastUpdatedDateTime: string; + /** Encountered error during document analysis. */ + error?: ErrorModelOutput; + /** Document analysis result. */ + analyzeResult?: AnalyzeResultOutput; +} + +/** Document analysis result. */ +export interface AnalyzeResultOutput { + /** API version used to produce this result. */ + apiVersion: string; + /** Document model ID used to produce this result. */ + modelId: string; + /** + * Method used to compute string offset and length. + * + * Possible values: textElements, unicodeCodePoint, utf16CodeUnit + */ + stringIndexType: string; + /** + * Format of the analyze result top-level content. + * + * Possible values: text, markdown + */ + contentFormat?: string; + /** + * Concatenate string representation of all textual and visual elements in reading + * order. + */ + content: string; + /** Analyzed pages. */ + pages: Array; + /** Extracted paragraphs. */ + paragraphs?: Array; + /** Extracted tables. */ + tables?: Array; + /** Extracted figures. */ + figures?: Array; + /** Extracted lists. */ + lists?: Array; + /** Extracted sections. */ + sections?: Array; + /** Extracted key-value pairs. */ + keyValuePairs?: Array; + /** Extracted font styles. */ + styles?: Array; + /** Detected languages. */ + languages?: Array; + /** Extracted documents. */ + documents?: Array; +} + +/** Content and layout elements extracted from a page from the input. */ +export interface DocumentPageOutput { + /** 1-based page number in the input document. */ + pageNumber: number; + /** + * The general orientation of the content in clockwise direction, measured in + * degrees between (-180, 180]. + */ + angle?: number; + /** The width of the image/PDF in pixels/inches, respectively. */ + width?: number; + /** The height of the image/PDF in pixels/inches, respectively. */ + height?: number; + /** + * The unit used by the width, height, and polygon properties. For images, the + * unit is "pixel". For PDF, the unit is "inch". + * + * Possible values: pixel, inch + */ + unit?: string; + /** Location of the page in the reading order concatenated content. */ + spans: Array; + /** Extracted words from the page. */ + words?: Array; + /** Extracted selection marks from the page. */ + selectionMarks?: Array; + /** + * Extracted lines from the page, potentially containing both textual and visual + * elements. + */ + lines?: Array; + /** Extracted barcodes from the page. */ + barcodes?: Array; + /** Extracted formulas from the page. */ + formulas?: Array; +} + +/** + * Contiguous region of the concatenated content property, specified as an offset + * and length. + */ +export interface DocumentSpanOutput { + /** Zero-based index of the content represented by the span. */ + offset: number; + /** Number of characters in the content represented by the span. */ + length: number; +} + +/** + * A word object consisting of a contiguous sequence of characters. For non-space + * delimited languages, such as Chinese, Japanese, and Korean, each character is + * represented as its own word. + */ +export interface DocumentWordOutput { + /** Text content of the word. */ + content: string; + /** + * Bounding polygon of the word, with coordinates specified relative to the + * top-left of the page. The numbers represent the x, y values of the polygon + * vertices, clockwise from the left (-180 degrees inclusive) relative to the + * element orientation. + */ + polygon?: number[]; + /** Location of the word in the reading order concatenated content. */ + span: DocumentSpanOutput; + /** Confidence of correctly extracting the word. */ + confidence: number; +} + +/** + * A selection mark object representing check boxes, radio buttons, and other + * elements indicating a selection. + */ +export interface DocumentSelectionMarkOutput { + /** + * State of the selection mark. + * + * Possible values: selected, unselected + */ + state: string; + /** + * Bounding polygon of the selection mark, with coordinates specified relative + * to the top-left of the page. The numbers represent the x, y values of the + * polygon vertices, clockwise from the left (-180 degrees inclusive) relative + * to the element orientation. + */ + polygon?: number[]; + /** Location of the selection mark in the reading order concatenated content. */ + span: DocumentSpanOutput; + /** Confidence of correctly extracting the selection mark. */ + confidence: number; +} + +/** + * A content line object consisting of an adjacent sequence of content elements, + * such as words and selection marks. + */ +export interface DocumentLineOutput { + /** Concatenated content of the contained elements in reading order. */ + content: string; + /** + * Bounding polygon of the line, with coordinates specified relative to the + * top-left of the page. The numbers represent the x, y values of the polygon + * vertices, clockwise from the left (-180 degrees inclusive) relative to the + * element orientation. + */ + polygon?: number[]; + /** Location of the line in the reading order concatenated content. */ + spans: Array; +} + +/** A barcode object. */ +export interface DocumentBarcodeOutput { + /** + * Barcode kind. + * + * Possible values: QRCode, PDF417, UPCA, UPCE, Code39, Code128, EAN8, EAN13, DataBar, Code93, Codabar, DataBarExpanded, ITF, MicroQRCode, Aztec, DataMatrix, MaxiCode + */ + kind: string; + /** Barcode value. */ + value: string; + /** + * Bounding polygon of the barcode, with coordinates specified relative to the + * top-left of the page. The numbers represent the x, y values of the polygon + * vertices, clockwise from the left (-180 degrees inclusive) relative to the + * element orientation. + */ + polygon?: number[]; + /** Location of the barcode in the reading order concatenated content. */ + span: DocumentSpanOutput; + /** Confidence of correctly extracting the barcode. */ + confidence: number; +} + +/** A formula object. */ +export interface DocumentFormulaOutput { + /** + * Formula kind. + * + * Possible values: inline, display + */ + kind: string; + /** LaTex expression describing the formula. */ + value: string; + /** + * Bounding polygon of the formula, with coordinates specified relative to the + * top-left of the page. The numbers represent the x, y values of the polygon + * vertices, clockwise from the left (-180 degrees inclusive) relative to the + * element orientation. + */ + polygon?: number[]; + /** Location of the formula in the reading order concatenated content. */ + span: DocumentSpanOutput; + /** Confidence of correctly extracting the formula. */ + confidence: number; +} + +/** + * A paragraph object consisting with contiguous lines generally with common + * alignment and spacing. + */ +export interface DocumentParagraphOutput { + /** + * Semantic role of the paragraph. + * + * Possible values: pageHeader, pageFooter, pageNumber, title, sectionHeading, footnote, formulaBlock + */ + role?: string; + /** Concatenated content of the paragraph in reading order. */ + content: string; + /** Bounding regions covering the paragraph. */ + boundingRegions?: Array; + /** Location of the paragraph in the reading order concatenated content. */ + spans: Array; +} + +/** Bounding polygon on a specific page of the input. */ +export interface BoundingRegionOutput { + /** 1-based page number of page containing the bounding region. */ + pageNumber: number; + /** + * Bounding polygon on the page, or the entire page if not specified. + * Coordinates specified relative to the top-left of the page. The numbers + * represent the x, y values of the polygon vertices, clockwise from the left + * (-180 degrees inclusive) relative to the element orientation. + */ + polygon: number[]; +} + +/** A table object consisting table cells arranged in a rectangular layout. */ +export interface DocumentTableOutput { + /** Number of rows in the table. */ + rowCount: number; + /** Number of columns in the table. */ + columnCount: number; + /** Cells contained within the table. */ + cells: Array; + /** Bounding regions covering the table. */ + boundingRegions?: Array; + /** Location of the table in the reading order concatenated content. */ + spans: Array; + /** Caption associated with the table. */ + caption?: DocumentCaptionOutput; + /** List of footnotes associated with the table. */ + footnotes?: Array; +} + +/** An object representing the location and content of a table cell. */ +export interface DocumentTableCellOutput { + /** + * Table cell kind. + * + * Possible values: content, rowHeader, columnHeader, stubHead, description + */ + kind?: string; + /** Row index of the cell. */ + rowIndex: number; + /** Column index of the cell. */ + columnIndex: number; + /** Number of rows spanned by this cell. */ + rowSpan?: number; + /** Number of columns spanned by this cell. */ + columnSpan?: number; + /** Concatenated content of the table cell in reading order. */ + content: string; + /** Bounding regions covering the table cell. */ + boundingRegions?: Array; + /** Location of the table cell in the reading order concatenated content. */ + spans: Array; + /** Child elements of the table cell. */ + elements?: string[]; +} + +/** A caption object describing a table or figure. */ +export interface DocumentCaptionOutput { + /** Content of the caption. */ + content: string; + /** Bounding regions covering the caption. */ + boundingRegions?: Array; + /** Location of the caption in the reading order concatenated content. */ + spans: Array; + /** Child elements of the caption. */ + elements?: string[]; +} + +/** A footnote object describing a table or figure. */ +export interface DocumentFootnoteOutput { + /** Content of the footnote. */ + content: string; + /** Bounding regions covering the footnote. */ + boundingRegions?: Array; + /** Location of the footnote in the reading order concatenated content. */ + spans: Array; + /** Child elements of the footnote. */ + elements?: string[]; +} + +/** An object representing a figure in the document. */ +export interface DocumentFigureOutput { + /** Bounding regions covering the figure. */ + boundingRegions?: Array; + /** Location of the figure in the reading order concatenated content. */ + spans: Array; + /** Child elements of the figure, excluding any caption or footnotes. */ + elements?: string[]; + /** Caption associated with the figure. */ + caption?: DocumentCaptionOutput; + /** List of footnotes associated with the figure. */ + footnotes?: Array; +} + +/** An object representing a list in the document. */ +export interface DocumentListOutput { + /** Location of the list in the reading order concatenated content. */ + spans: Array; + /** Items in the list. */ + items: Array; +} + +/** An object representing a list item in the document. */ +export interface DocumentListItemOutput { + /** Level of the list item (1-indexed). */ + level: number; + /** Content of the list item. */ + content: string; + /** Bounding regions covering the list item. */ + boundingRegions?: Array; + /** Location of the list item in the reading order concatenated content. */ + spans: Array; + /** Child elements of the list item. */ + elements?: string[]; +} + +/** An object representing a section in the document. */ +export interface DocumentSectionOutput { + /** Location of the section in the reading order concatenated content. */ + spans: Array; + /** Child elements of the section. */ + elements?: string[]; +} + +/** + * An object representing a form field with distinct field label (key) and field + * value (may be empty). + */ +export interface DocumentKeyValuePairOutput { + /** Field label of the key-value pair. */ + key: DocumentKeyValueElementOutput; + /** Field value of the key-value pair. */ + value?: DocumentKeyValueElementOutput; + /** Confidence of correctly extracting the key-value pair. */ + confidence: number; +} + +/** An object representing the field key or value in a key-value pair. */ +export interface DocumentKeyValueElementOutput { + /** Concatenated content of the key-value element in reading order. */ + content: string; + /** Bounding regions covering the key-value element. */ + boundingRegions?: Array; + /** Location of the key-value element in the reading order concatenated content. */ + spans: Array; +} + +/** An object representing observed text styles. */ +export interface DocumentStyleOutput { + /** Is content handwritten? */ + isHandwritten?: boolean; + /** + * Visually most similar font from among the set of supported font families, with + * fallback fonts following CSS convention (ex. 'Arial, sans-serif'). + */ + similarFontFamily?: string; + /** + * Font style. + * + * Possible values: normal, italic + */ + fontStyle?: string; + /** + * Font weight. + * + * Possible values: normal, bold + */ + fontWeight?: string; + /** Foreground color in #rrggbb hexadecimal format. */ + color?: string; + /** Background color in #rrggbb hexadecimal format.. */ + backgroundColor?: string; + /** Location of the text elements in the concatenated content the style applies to. */ + spans: Array; + /** Confidence of correctly identifying the style. */ + confidence: number; +} + +/** An object representing the detected language for a given text span. */ +export interface DocumentLanguageOutput { + /** + * Detected language. Value may an ISO 639-1 language code (ex. "en", "fr") + * or BCP 47 language tag (ex. "zh-Hans"). + */ + locale: string; + /** + * Location of the text elements in the concatenated content the language applies + * to. + */ + spans: Array; + /** Confidence of correctly identifying the language. */ + confidence: number; +} + +/** An object describing the location and semantic content of a document. */ +export interface DocumentOutput { + /** Document type. */ + docType: string; + /** Bounding regions covering the document. */ + boundingRegions?: Array; + /** Location of the document in the reading order concatenated content. */ + spans: Array; + /** Dictionary of named field values. */ + fields?: Record; + /** Confidence of correctly extracting the document. */ + confidence: number; +} + +/** An object representing the content and location of a field value. */ +export interface DocumentFieldOutput { + /** + * Data type of the field value. + * + * Possible values: string, date, time, phoneNumber, number, integer, selectionMark, countryRegion, signature, array, object, currency, address, boolean + */ + type: string; + /** String value. */ + valueString?: string; + /** Date value in YYYY-MM-DD format (ISO 8601). */ + valueDate?: string; + /** Time value in hh:mm:ss format (ISO 8601). */ + valueTime?: string; + /** Phone number value in E.164 format (ex. +19876543210). */ + valuePhoneNumber?: string; + /** Floating point value. */ + valueNumber?: number; + /** Integer value. */ + valueInteger?: number; + /** + * Selection mark value. + * + * Possible values: selected, unselected + */ + valueSelectionMark?: string; + /** + * Presence of signature. + * + * Possible values: signed, unsigned + */ + valueSignature?: string; + /** 3-letter country code value (ISO 3166-1 alpha-3). */ + valueCountryRegion?: string; + /** Array of field values. */ + valueArray?: Array; + /** Dictionary of named field values. */ + valueObject?: Record; + /** Currency value. */ + valueCurrency?: CurrencyValueOutput; + /** Address value. */ + valueAddress?: AddressValueOutput; + /** Boolean value. */ + valueBoolean?: boolean; + /** Field content. */ + content?: string; + /** Bounding regions covering the field. */ + boundingRegions?: Array; + /** Location of the field in the reading order concatenated content. */ + spans?: Array; + /** Confidence of correctly extracting the field. */ + confidence?: number; +} + +/** Currency field value. */ +export interface CurrencyValueOutput { + /** Currency amount. */ + amount: number; + /** Currency symbol label, if any. */ + currencySymbol?: string; + /** Resolved currency code (ISO 4217), if any. */ + currencyCode?: string; +} + +/** Address field value. */ +export interface AddressValueOutput { + /** House or building number. */ + houseNumber?: string; + /** Post office box number. */ + poBox?: string; + /** Street name. */ + road?: string; + /** Name of city, town, village, etc. */ + city?: string; + /** First-level administrative division. */ + state?: string; + /** Postal code used for mail sorting. */ + postalCode?: string; + /** Country/region. */ + countryRegion?: string; + /** Street-level address, excluding city, state, countryRegion, and postalCode. */ + streetAddress?: string; + /** Apartment or office number */ + unit?: string; + /** + * Districts or boroughs within a city, such as Brooklyn in New York City or City + * of Westminster in London. + */ + cityDistrict?: string; + /** Second-level administrative division used in certain locales. */ + stateDistrict?: string; + /** Unofficial neighborhood name, like Chinatown. */ + suburb?: string; + /** Build name, such as World Trade Center. */ + house?: string; + /** Floor number, such as 3F. */ + level?: string; +} + +/** + * Authorization to copy a document model to the specified target resource and + * modelId. + */ +export interface CopyAuthorizationOutput { + /** ID of the target Azure resource where the document model should be copied to. */ + targetResourceId: string; + /** + * Location of the target Azure resource where the document model should be copied + * to. + */ + targetResourceRegion: string; + /** Identifier of the target document model. */ + targetModelId: string; + /** URL of the copied document model in the target account. */ + targetModelLocation: string; + /** Token used to authorize the request. */ + accessToken: string; + /** Date/time when the access token expires. */ + expirationDateTime: string; +} + +/** Operation info. */ +export type OperationDetailsOutput = + | DocumentModelBuildOperationDetailsOutput + | DocumentModelComposeOperationDetailsOutput + | DocumentModelCopyToOperationDetailsOutput + | DocumentClassifierBuildOperationDetailsOutput; +/** Paged collection of OperationDetails items */ +export type PagedOperationDetailsOutput = Paged; +/** Paged collection of DocumentModelDetails items */ +export type PagedDocumentModelDetailsOutput = Paged; +/** Paged collection of DocumentClassifierDetails items */ +export type PagedDocumentClassifierDetailsOutput = Paged; diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/src/paginateHelper.ts b/sdk/documentintelligence/ai-document-intelligence-rest/src/paginateHelper.ts new file mode 100644 index 000000000000..477bace4a37f --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/src/paginateHelper.ts @@ -0,0 +1,131 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +import { getPagedAsyncIterator, PagedAsyncIterableIterator, PagedResult } from "@azure/core-paging"; +import { Client, createRestError, PathUncheckedResponse } from "@azure-rest/core-client"; + +/** + * Helper type to extract the type of an array + */ +export type GetArrayType = T extends Array ? TData : never; + +/** + * The type of a custom function that defines how to get a page and a link to the next one if any. + */ +export type GetPage = ( + pageLink: string, + maxPageSize?: number +) => Promise<{ + page: TPage; + nextPageLink?: string; +}>; + +/** + * Options for the paging helper + */ +export interface PagingOptions { + /** + * Custom function to extract pagination details for crating the PagedAsyncIterableIterator + */ + customGetPage?: GetPage[]>; +} + +/** + * Helper type to infer the Type of the paged elements from the response type + * This type is generated based on the swagger information for x-ms-pageable + * specifically on the itemName property which indicates the property of the response + * where the page items are found. The default value is `value`. + * This type will allow us to provide strongly typed Iterator based on the response we get as second parameter + */ +export type PaginateReturn = TResult extends { + body: { value?: infer TPage }; +} + ? GetArrayType + : Array; + +/** + * Helper to paginate results from an initial response that follows the specification of Autorest `x-ms-pageable` extension + * @param client - Client to use for sending the next page requests + * @param initialResponse - Initial response containing the nextLink and current page of elements + * @param customGetPage - Optional - Function to define how to extract the page and next link to be used to paginate the results + * @returns - PagedAsyncIterableIterator to iterate the elements + */ +export function paginate( + client: Client, + initialResponse: TResponse, + options: PagingOptions = {} +): PagedAsyncIterableIterator> { + // Extract element type from initial response + type TElement = PaginateReturn; + let firstRun = true; + const itemName = "value"; + const nextLinkName = "nextLink"; + const { customGetPage } = options; + const pagedResult: PagedResult = { + firstPageLink: "", + getPage: + typeof customGetPage === "function" + ? customGetPage + : async (pageLink: string) => { + const result = firstRun ? initialResponse : await client.pathUnchecked(pageLink).get(); + firstRun = false; + checkPagingRequest(result); + const nextLink = getNextLink(result.body, nextLinkName); + const values = getElements(result.body, itemName); + return { + page: values, + nextPageLink: nextLink, + }; + }, + }; + + return getPagedAsyncIterator(pagedResult); +} + +/** + * Gets for the value of nextLink in the body + */ +function getNextLink(body: unknown, nextLinkName?: string): string | undefined { + if (!nextLinkName) { + return undefined; + } + + const nextLink = (body as Record)[nextLinkName]; + + if (typeof nextLink !== "string" && typeof nextLink !== "undefined") { + throw new Error(`Body Property ${nextLinkName} should be a string or undefined`); + } + + return nextLink; +} + +/** + * Gets the elements of the current request in the body. + */ +function getElements(body: unknown, itemName: string): T[] { + const value = (body as Record)[itemName] as T[]; + + // value has to be an array according to the x-ms-pageable extension. + // The fact that this must be an array is used above to calculate the + // type of elements in the page in PaginateReturn + if (!Array.isArray(value)) { + throw new Error( + `Couldn't paginate response\n Body doesn't contain an array property with name: ${itemName}` + ); + } + + return value ?? []; +} + +/** + * Checks if a request failed + */ +function checkPagingRequest(response: PathUncheckedResponse): void { + const Http2xxStatusCodes = ["200", "201", "202", "203", "204", "205", "206", "207", "208", "226"]; + if (!Http2xxStatusCodes.includes(response.status)) { + throw createRestError( + `Pagination failed with unexpected statusCode ${response.status}`, + response + ); + } +} diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/src/parameters.ts b/sdk/documentintelligence/ai-document-intelligence-rest/src/parameters.ts new file mode 100644 index 000000000000..b484d8f70299 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/src/parameters.ts @@ -0,0 +1,252 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +import { RequestParameters } from "@azure-rest/core-client"; +import { + AnalyzeDocumentRequest, + BuildDocumentModelRequest, + ComposeDocumentModelRequest, + AuthorizeCopyRequest, + CopyAuthorization, + BuildDocumentClassifierRequest, + ClassifyDocumentRequest, +} from "./models"; + +export type ListOperationsParameters = RequestParameters; +export type GetDocumentModelBuildOperationParameters = RequestParameters; +export type GetDocumentModelComposeOperationParameters = RequestParameters; +export type GetDocumentModelCopyToOperationParameters = RequestParameters; +export type GetDocumentClassifierBuildOperationParameters = RequestParameters; +export type GetOperationParameters = RequestParameters; +export type GetResourceInfoParameters = RequestParameters; +export type GetAnalyzeResultParameters = RequestParameters; + +export interface AnalyzeDocumentFromStreamBodyParam { + /** + * Input content. + * + * Value may contain any sequence of octets + */ + body: string | Uint8Array | ReadableStream | NodeJS.ReadableStream; +} + +export interface AnalyzeDocumentFromStreamQueryParamProperties { + /** List of 1-based page numbers to analyze. Ex. "1-3,5,7-9" */ + pages?: string; + /** + * Locale hint for text recognition and document analysis. Value may contain only + * the language code (ex. "en", "fr") or BCP 47 language tag (ex. "en-US"). + */ + locale?: string; + /** + * Method used to compute string offset and length. + * + * Possible values: textElements, unicodeCodePoint, utf16CodeUnit + */ + stringIndexType?: string; + /** List of optional analysis features. */ + features?: string[]; + /** List of additional fields to extract. Ex. "NumberOfGuests,StoreNumber" */ + queryFields?: string[]; + /** + * Format of the analyze result top-level content. + * + * Possible values: text, markdown + */ + outputContentFormat?: string; +} + +export interface AnalyzeDocumentFromStreamQueryParam { + queryParameters?: AnalyzeDocumentFromStreamQueryParamProperties; +} + +export interface AnalyzeDocumentFromStreamMediaTypesParam { + /** Input content type. */ + contentType: + | "application/octet-stream" + | "application/pdf" + | "image/jpeg" + | "image/png" + | "image/tiff" + | "image/bmp" + | "image/heif" + | "text/html" + | "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + | "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + | "application/vnd.openxmlformats-officedocument.presentationml.presentation"; +} + +export type AnalyzeDocumentFromStreamParameters = AnalyzeDocumentFromStreamQueryParam & + AnalyzeDocumentFromStreamMediaTypesParam & + AnalyzeDocumentFromStreamBodyParam & + RequestParameters; + +export interface AnalyzeDocumentBodyParam { + /** Analyze request parameters. */ + body?: AnalyzeDocumentRequest; +} + +export interface AnalyzeDocumentQueryParamProperties { + /** List of 1-based page numbers to analyze. Ex. "1-3,5,7-9" */ + pages?: string; + /** + * Locale hint for text recognition and document analysis. Value may contain only + * the language code (ex. "en", "fr") or BCP 47 language tag (ex. "en-US"). + */ + locale?: string; + /** + * Method used to compute string offset and length. + * + * Possible values: textElements, unicodeCodePoint, utf16CodeUnit + */ + stringIndexType?: string; + /** List of optional analysis features. */ + features?: string[]; + /** List of additional fields to extract. Ex. "NumberOfGuests,StoreNumber" */ + queryFields?: string[]; + /** + * Format of the analyze result top-level content. + * + * Possible values: text, markdown + */ + outputContentFormat?: string; +} + +export interface AnalyzeDocumentQueryParam { + queryParameters?: AnalyzeDocumentQueryParamProperties; +} + +export interface AnalyzeDocumentMediaTypesParam { + /** Input content type */ + contentType: "application/json"; +} + +export type AnalyzeDocumentParameters = AnalyzeDocumentQueryParam & + AnalyzeDocumentMediaTypesParam & + AnalyzeDocumentBodyParam & + RequestParameters; +export type GetModelParameters = RequestParameters; + +export interface BuildModelBodyParam { + /** Build request parameters. */ + body: BuildDocumentModelRequest; +} + +export type BuildModelParameters = BuildModelBodyParam & RequestParameters; + +export interface ComposeModelBodyParam { + /** Compose request parameters. */ + body: ComposeDocumentModelRequest; +} + +export type ComposeModelParameters = ComposeModelBodyParam & RequestParameters; + +export interface AuthorizeModelCopyBodyParam { + /** Authorize copy request parameters. */ + body: AuthorizeCopyRequest; +} + +export type AuthorizeModelCopyParameters = AuthorizeModelCopyBodyParam & RequestParameters; + +export interface CopyModelToBodyParam { + /** Copy to request parameters. */ + body: CopyAuthorization; +} + +export type CopyModelToParameters = CopyModelToBodyParam & RequestParameters; +export type ListModelsParameters = RequestParameters; +export type DeleteModelParameters = RequestParameters; + +export interface BuildClassifierBodyParam { + /** Build request parameters. */ + body: BuildDocumentClassifierRequest; +} + +export type BuildClassifierParameters = BuildClassifierBodyParam & RequestParameters; +export type ListClassifiersParameters = RequestParameters; +export type GetClassifierParameters = RequestParameters; +export type DeleteClassifierParameters = RequestParameters; + +export interface ClassifyDocumentFromStreamBodyParam { + /** + * Input content. + * + * Value may contain any sequence of octets + */ + body: string | Uint8Array | ReadableStream | NodeJS.ReadableStream; +} + +export interface ClassifyDocumentFromStreamQueryParamProperties { + /** + * Method used to compute string offset and length. + * + * Possible values: textElements, unicodeCodePoint, utf16CodeUnit + */ + stringIndexType?: string; + /** + * Document splitting mode. + * + * Possible values: auto, none, perPage + */ + split?: string; +} + +export interface ClassifyDocumentFromStreamQueryParam { + queryParameters?: ClassifyDocumentFromStreamQueryParamProperties; +} + +export interface ClassifyDocumentFromStreamMediaTypesParam { + /** Input content type. */ + contentType: + | "application/octet-stream" + | "application/pdf" + | "image/jpeg" + | "image/png" + | "image/tiff" + | "image/bmp" + | "image/heif" + | "text/html" + | "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + | "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + | "application/vnd.openxmlformats-officedocument.presentationml.presentation"; +} + +export type ClassifyDocumentFromStreamParameters = ClassifyDocumentFromStreamQueryParam & + ClassifyDocumentFromStreamMediaTypesParam & + ClassifyDocumentFromStreamBodyParam & + RequestParameters; + +export interface ClassifyDocumentBodyParam { + /** Classify request parameters. */ + body: ClassifyDocumentRequest; +} + +export interface ClassifyDocumentQueryParamProperties { + /** + * Method used to compute string offset and length. + * + * Possible values: textElements, unicodeCodePoint, utf16CodeUnit + */ + stringIndexType?: string; + /** + * Document splitting mode. + * + * Possible values: auto, none, perPage + */ + split?: string; +} + +export interface ClassifyDocumentQueryParam { + queryParameters?: ClassifyDocumentQueryParamProperties; +} + +export interface ClassifyDocumentMediaTypesParam { + /** Input content type */ + contentType: "application/json"; +} + +export type ClassifyDocumentParameters = ClassifyDocumentQueryParam & + ClassifyDocumentMediaTypesParam & + ClassifyDocumentBodyParam & + RequestParameters; +export type GetClassifyResultParameters = RequestParameters; diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/src/pollingHelper.ts b/sdk/documentintelligence/ai-document-intelligence-rest/src/pollingHelper.ts new file mode 100644 index 000000000000..20c5c3ff916f --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/src/pollingHelper.ts @@ -0,0 +1,136 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +import { Client, HttpResponse } from "@azure-rest/core-client"; +import { + CreateHttpPollerOptions, + LongRunningOperation, + LroResponse, + OperationState, + SimplePollerLike, + createHttpPoller, +} from "@azure/core-lro"; +import { + AnalyzeDocumentFromStream202Response, + AnalyzeDocumentFromStreamDefaultResponse, + AnalyzeDocumentFromStreamLogicalResponse, + BuildModel202Response, + BuildModelDefaultResponse, + BuildModelLogicalResponse, + ComposeModel202Response, + ComposeModelDefaultResponse, + ComposeModelLogicalResponse, + CopyModelTo202Response, + CopyModelToDefaultResponse, + CopyModelToLogicalResponse, + BuildClassifier202Response, + BuildClassifierDefaultResponse, + BuildClassifierLogicalResponse, + ClassifyDocumentFromStream202Response, + ClassifyDocumentFromStreamDefaultResponse, + ClassifyDocumentFromStreamLogicalResponse, +} from "./responses"; +/** + * Helper function that builds a Poller object to help polling a long running operation. + * @param client - Client to use for sending the request to get additional pages. + * @param initialResponse - The initial response. + * @param options - Options to set a resume state or custom polling interval. + * @returns - A poller object to poll for operation state updates and eventually get the final response. + */ +export async function getLongRunningPoller< + TResult extends BuildModelLogicalResponse | BuildModelDefaultResponse +>( + client: Client, + initialResponse: BuildModel202Response | BuildModelDefaultResponse, + options?: CreateHttpPollerOptions> +): Promise, TResult>>; +export async function getLongRunningPoller< + TResult extends ComposeModelLogicalResponse | ComposeModelDefaultResponse +>( + client: Client, + initialResponse: ComposeModel202Response | ComposeModelDefaultResponse, + options?: CreateHttpPollerOptions> +): Promise, TResult>>; +export async function getLongRunningPoller< + TResult extends CopyModelToLogicalResponse | CopyModelToDefaultResponse +>( + client: Client, + initialResponse: CopyModelTo202Response | CopyModelToDefaultResponse, + options?: CreateHttpPollerOptions> +): Promise, TResult>>; +export async function getLongRunningPoller< + TResult extends BuildClassifierLogicalResponse | BuildClassifierDefaultResponse +>( + client: Client, + initialResponse: BuildClassifier202Response | BuildClassifierDefaultResponse, + options?: CreateHttpPollerOptions> +): Promise, TResult>>; +export async function getLongRunningPoller< + TResult extends + | AnalyzeDocumentFromStreamLogicalResponse + | AnalyzeDocumentFromStreamDefaultResponse +>( + client: Client, + initialResponse: AnalyzeDocumentFromStream202Response | AnalyzeDocumentFromStreamDefaultResponse, + options?: CreateHttpPollerOptions> +): Promise, TResult>>; +export async function getLongRunningPoller< + TResult extends + | ClassifyDocumentFromStreamLogicalResponse + | ClassifyDocumentFromStreamDefaultResponse +>( + client: Client, + initialResponse: + | ClassifyDocumentFromStream202Response + | ClassifyDocumentFromStreamDefaultResponse, + options?: CreateHttpPollerOptions> +): Promise, TResult>>; +export async function getLongRunningPoller( + client: Client, + initialResponse: TResult, + options: CreateHttpPollerOptions> = {} +): Promise, TResult>> { + const poller: LongRunningOperation = { + requestMethod: initialResponse.request.method, + requestPath: initialResponse.request.url, + sendInitialRequest: async () => { + // In the case of Rest Clients we are building the LRO poller object from a response that's the reason + // we are not triggering the initial request here, just extracting the information from the + // response we were provided. + return getLroResponse(initialResponse); + }, + sendPollRequest: async (path) => { + // This is the callback that is going to be called to poll the service + // to get the latest status. We use the client provided and the polling path + // which is an opaque URL provided by caller, the service sends this in one of the following headers: operation-location, azure-asyncoperation or location + // depending on the lro pattern that the service implements. If non is provided we default to the initial path. + const response = await client.pathUnchecked(path ?? initialResponse.request.url).get(); + const lroResponse = getLroResponse(response as TResult); + lroResponse.rawResponse.headers["x-ms-original-url"] = initialResponse.request.url; + return lroResponse; + }, + }; + + options.resolveOnUnsuccessful = options.resolveOnUnsuccessful ?? true; + return createHttpPoller(poller, options); +} + +/** + * Converts a Rest Client response to a response that the LRO implementation understands + * @param response - a rest client http response + * @returns - An LRO response that the LRO implementation understands + */ +function getLroResponse(response: TResult): LroResponse { + if (Number.isNaN(response.status)) { + throw new TypeError(`Status code of the response is not a number. Value: ${response.status}`); + } + + return { + flatResponse: response, + rawResponse: { + ...response, + statusCode: Number.parseInt(response.status), + body: response.body, + }, + }; +} diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/src/responses.ts b/sdk/documentintelligence/ai-document-intelligence-rest/src/responses.ts new file mode 100644 index 000000000000..804da826a259 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/src/responses.ts @@ -0,0 +1,355 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +import { RawHttpHeaders } from "@azure/core-rest-pipeline"; +import { HttpResponse } from "@azure-rest/core-client"; +import { + PagedOperationDetailsOutput, + ErrorResponseOutput, + DocumentModelBuildOperationDetailsOutput, + DocumentModelComposeOperationDetailsOutput, + DocumentModelCopyToOperationDetailsOutput, + DocumentClassifierBuildOperationDetailsOutput, + OperationDetailsOutput, + ResourceDetailsOutput, + AnalyzeResultOperationOutput, + DocumentModelDetailsOutput, + CopyAuthorizationOutput, + PagedDocumentModelDetailsOutput, + PagedDocumentClassifierDetailsOutput, + DocumentClassifierDetailsOutput, +} from "./outputModels"; + +/** The request has succeeded. */ +export interface ListOperations200Response extends HttpResponse { + status: "200"; + body: PagedOperationDetailsOutput; +} + +export interface ListOperationsDefaultResponse extends HttpResponse { + status: string; + body: ErrorResponseOutput; +} + +/** The request has succeeded. */ +export interface GetDocumentModelBuildOperation200Response extends HttpResponse { + status: "200"; + body: DocumentModelBuildOperationDetailsOutput; +} + +export interface GetDocumentModelBuildOperationDefaultResponse extends HttpResponse { + status: string; + body: ErrorResponseOutput; +} + +/** The request has succeeded. */ +export interface GetDocumentModelComposeOperation200Response extends HttpResponse { + status: "200"; + body: DocumentModelComposeOperationDetailsOutput; +} + +export interface GetDocumentModelComposeOperationDefaultResponse extends HttpResponse { + status: string; + body: ErrorResponseOutput; +} + +/** The request has succeeded. */ +export interface GetDocumentModelCopyToOperation200Response extends HttpResponse { + status: "200"; + body: DocumentModelCopyToOperationDetailsOutput; +} + +export interface GetDocumentModelCopyToOperationDefaultResponse extends HttpResponse { + status: string; + body: ErrorResponseOutput; +} + +/** The request has succeeded. */ +export interface GetDocumentClassifierBuildOperation200Response extends HttpResponse { + status: "200"; + body: DocumentClassifierBuildOperationDetailsOutput; +} + +export interface GetDocumentClassifierBuildOperationDefaultResponse extends HttpResponse { + status: string; + body: ErrorResponseOutput; +} + +/** The request has succeeded. */ +export interface GetOperation200Response extends HttpResponse { + status: "200"; + body: OperationDetailsOutput; +} + +export interface GetOperationDefaultResponse extends HttpResponse { + status: string; + body: ErrorResponseOutput; +} + +/** The request has succeeded. */ +export interface GetResourceInfo200Response extends HttpResponse { + status: "200"; + body: ResourceDetailsOutput; +} + +export interface GetResourceInfoDefaultResponse extends HttpResponse { + status: string; + body: ErrorResponseOutput; +} + +/** The request has succeeded. */ +export interface GetAnalyzeResult200Response extends HttpResponse { + status: "200"; + body: AnalyzeResultOperationOutput; +} + +export interface GetAnalyzeResultDefaultResponse extends HttpResponse { + status: string; + body: ErrorResponseOutput; +} + +export interface AnalyzeDocumentFromStream202Headers { + "operation-location": string; +} + +/** The request has been accepted for processing, but processing has not yet completed. */ +export interface AnalyzeDocumentFromStream202Response extends HttpResponse { + status: "202"; + headers: RawHttpHeaders & AnalyzeDocumentFromStream202Headers; +} + +export interface AnalyzeDocumentFromStreamDefaultResponse extends HttpResponse { + status: string; + body: ErrorResponseOutput; +} + +/** The final response for long-running analyzeDocumentFromStream operation */ +export interface AnalyzeDocumentFromStreamLogicalResponse extends HttpResponse { + status: "200"; +} + +export interface AnalyzeDocument202Headers { + "operation-location": string; +} + +/** The request has been accepted for processing, but processing has not yet completed. */ +export interface AnalyzeDocument202Response extends HttpResponse { + status: "202"; + headers: RawHttpHeaders & AnalyzeDocument202Headers; +} + +export interface AnalyzeDocumentDefaultResponse extends HttpResponse { + status: string; + body: ErrorResponseOutput; +} + +/** The final response for long-running analyzeDocument operation */ +export interface AnalyzeDocumentLogicalResponse extends HttpResponse { + status: "200"; +} + +/** The request has succeeded. */ +export interface GetModel200Response extends HttpResponse { + status: "200"; + body: DocumentModelDetailsOutput; +} + +export interface GetModelDefaultResponse extends HttpResponse { + status: string; + body: ErrorResponseOutput; +} + +export interface BuildModel202Headers { + "operation-location": string; +} + +/** The request has been accepted for processing, but processing has not yet completed. */ +export interface BuildModel202Response extends HttpResponse { + status: "202"; + headers: RawHttpHeaders & BuildModel202Headers; +} + +export interface BuildModelDefaultResponse extends HttpResponse { + status: string; + body: ErrorResponseOutput; +} + +/** The final response for long-running buildModel operation */ +export interface BuildModelLogicalResponse extends HttpResponse { + status: "200"; +} + +export interface ComposeModel202Headers { + "operation-location": string; +} + +/** The request has been accepted for processing, but processing has not yet completed. */ +export interface ComposeModel202Response extends HttpResponse { + status: "202"; + headers: RawHttpHeaders & ComposeModel202Headers; +} + +export interface ComposeModelDefaultResponse extends HttpResponse { + status: string; + body: ErrorResponseOutput; +} + +/** The final response for long-running composeModel operation */ +export interface ComposeModelLogicalResponse extends HttpResponse { + status: "200"; +} + +/** The request has succeeded. */ +export interface AuthorizeModelCopy200Response extends HttpResponse { + status: "200"; + body: CopyAuthorizationOutput; +} + +export interface AuthorizeModelCopyDefaultResponse extends HttpResponse { + status: string; + body: ErrorResponseOutput; +} + +export interface CopyModelTo202Headers { + "operation-location": string; +} + +/** The request has been accepted for processing, but processing has not yet completed. */ +export interface CopyModelTo202Response extends HttpResponse { + status: "202"; + headers: RawHttpHeaders & CopyModelTo202Headers; +} + +export interface CopyModelToDefaultResponse extends HttpResponse { + status: string; + body: ErrorResponseOutput; +} + +/** The final response for long-running copyModelTo operation */ +export interface CopyModelToLogicalResponse extends HttpResponse { + status: "200"; +} + +/** The request has succeeded. */ +export interface ListModels200Response extends HttpResponse { + status: "200"; + body: PagedDocumentModelDetailsOutput; +} + +export interface ListModelsDefaultResponse extends HttpResponse { + status: string; + body: ErrorResponseOutput; +} + +/** There is no content to send for this request, but the headers may be useful. */ +export interface DeleteModel204Response extends HttpResponse { + status: "204"; +} + +export interface DeleteModelDefaultResponse extends HttpResponse { + status: string; + body: ErrorResponseOutput; +} + +export interface BuildClassifier202Headers { + "operation-location": string; +} + +/** The request has been accepted for processing, but processing has not yet completed. */ +export interface BuildClassifier202Response extends HttpResponse { + status: "202"; + headers: RawHttpHeaders & BuildClassifier202Headers; +} + +export interface BuildClassifierDefaultResponse extends HttpResponse { + status: string; + body: ErrorResponseOutput; +} + +/** The final response for long-running buildClassifier operation */ +export interface BuildClassifierLogicalResponse extends HttpResponse { + status: "200"; +} + +/** The request has succeeded. */ +export interface ListClassifiers200Response extends HttpResponse { + status: "200"; + body: PagedDocumentClassifierDetailsOutput; +} + +export interface ListClassifiersDefaultResponse extends HttpResponse { + status: string; + body: ErrorResponseOutput; +} + +/** The request has succeeded. */ +export interface GetClassifier200Response extends HttpResponse { + status: "200"; + body: DocumentClassifierDetailsOutput; +} + +export interface GetClassifierDefaultResponse extends HttpResponse { + status: string; + body: ErrorResponseOutput; +} + +/** There is no content to send for this request, but the headers may be useful. */ +export interface DeleteClassifier204Response extends HttpResponse { + status: "204"; +} + +export interface DeleteClassifierDefaultResponse extends HttpResponse { + status: string; + body: ErrorResponseOutput; +} + +export interface ClassifyDocumentFromStream202Headers { + "operation-location": string; +} + +/** The request has been accepted for processing, but processing has not yet completed. */ +export interface ClassifyDocumentFromStream202Response extends HttpResponse { + status: "202"; + headers: RawHttpHeaders & ClassifyDocumentFromStream202Headers; +} + +export interface ClassifyDocumentFromStreamDefaultResponse extends HttpResponse { + status: string; + body: ErrorResponseOutput; +} + +/** The final response for long-running classifyDocumentFromStream operation */ +export interface ClassifyDocumentFromStreamLogicalResponse extends HttpResponse { + status: "200"; +} + +export interface ClassifyDocument202Headers { + "operation-location": string; +} + +/** The request has been accepted for processing, but processing has not yet completed. */ +export interface ClassifyDocument202Response extends HttpResponse { + status: "202"; + headers: RawHttpHeaders & ClassifyDocument202Headers; +} + +export interface ClassifyDocumentDefaultResponse extends HttpResponse { + status: string; + body: ErrorResponseOutput; +} + +/** The final response for long-running classifyDocument operation */ +export interface ClassifyDocumentLogicalResponse extends HttpResponse { + status: "200"; +} + +/** The request has succeeded. */ +export interface GetClassifyResult200Response extends HttpResponse { + status: "200"; + body: AnalyzeResultOperationOutput; +} + +export interface GetClassifyResultDefaultResponse extends HttpResponse { + status: string; + body: ErrorResponseOutput; +} diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/test/public/analysis.spec.ts b/sdk/documentintelligence/ai-document-intelligence-rest/test/public/analysis.spec.ts new file mode 100644 index 000000000000..bf52868d1343 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/test/public/analysis.spec.ts @@ -0,0 +1,904 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +import { Recorder, assertEnvironmentVariable } from "@azure-tools/test-recorder"; +import { createRecorder, testPollingOptions } from "./utils/recorderUtils"; +import { Context } from "mocha"; +import DocumentIntelligence, { + AnalyzeResultOperationOutput, + DocumentBarcodeOutput, + DocumentIntelligenceClient, + DocumentModelBuildOperationDetailsOutput, + DocumentModelDetailsOutput, + DocumentTableOutput, + getLongRunningPoller, + isUnexpected, +} from "../../src"; +import { assert } from "chai"; +import { ASSET_PATH, getRandomNumber, makeTestUrl } from "./utils/utils"; +import path from "path"; +import fs from "fs"; + +describe("DocumentIntelligenceClient", () => { + let recorder: Recorder; + let client: DocumentIntelligenceClient; + beforeEach(async function (this: Context) { + recorder = await createRecorder(this); + await recorder.setMatcher("BodilessMatcher"); + client = DocumentIntelligence( + assertEnvironmentVariable("DOCUMENT_INTELLIGENCE_ENDPOINT"), + { key: assertEnvironmentVariable("DOCUMENT_INTELLIGENCE_API_KEY") }, + recorder.configureClientOptions({}) + ); + }); + + afterEach(async function () { + await recorder.stop(); + }); + + describe("content analysis", () => { + it("pdf file stream", async () => { + const filePath = path.join(ASSET_PATH, "forms", "Invoice_1.pdf"); + + const base64Source = fs.readFileSync(filePath, { encoding: "base64" }); + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-layout") + .post({ + contentType: "application/json", + body: { + base64Source, + }, + queryParameters: { locale: "en-IN" }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + const analyzeResult = ( + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const pages = analyzeResult?.pages; + const tables = analyzeResult?.tables; + assert.ok(pages && pages.length > 0, `Expected non-empty pages but got ${pages}`); + assert.isNotEmpty(pages); + assert.isNotEmpty(tables); + + const [table] = tables!; + assert.ok(table.boundingRegions?.[0]); + assert.equal(table.boundingRegions?.[0].pageNumber, 1); + }); + + it("png file stream", async () => { + const filePath = path.join(ASSET_PATH, "receipt", "contoso-receipt.png"); + + const base64Source = fs.readFileSync(filePath, { encoding: "base64" }); + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-layout") + .post({ + contentType: "application/json", + body: { + base64Source, + }, + queryParameters: { locale: "en-IN" }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + const analyzeResult = ( + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const pages = analyzeResult?.pages; + const paragraphs = analyzeResult?.paragraphs; + + assert.ok( + paragraphs && paragraphs.length > 0, + `Expected non-empty paragraphs but got ${paragraphs}.` + ); + + assert.ok(pages && pages.length > 0, `Expect no-empty pages but got ${pages}`); + }); + + it("jpeg file stream", async () => { + const filePath = path.join(ASSET_PATH, "forms", "Form_1.jpg"); + + const base64Source = fs.readFileSync(filePath, { encoding: "base64" }); + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-layout") + .post({ + contentType: "application/json", + body: { + base64Source, + }, + queryParameters: { locale: "en-IN" }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + const analyzeResult = ( + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const pages = analyzeResult?.pages; + const tables = analyzeResult?.tables; + assert.isNotEmpty(pages); + assert.isNotEmpty(tables); + const [table] = tables as DocumentTableOutput[]; + assert.ok(table.boundingRegions?.[0].polygon); + assert.equal(table.boundingRegions?.[0].pageNumber, 1); + }); + + it("tiff file stream", async () => { + const filePath = path.join(ASSET_PATH, "forms", "Invoice_1.tiff"); + + const base64Source = fs.readFileSync(filePath, { encoding: "base64" }); + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-layout") + .post({ + contentType: "application/json", + body: { + base64Source, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + const analyzeResult = ( + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const pages = analyzeResult?.pages; + const tables = analyzeResult?.tables; + assert.isNotEmpty(pages); + assert.isNotEmpty(tables); + const [table] = tables as DocumentTableOutput[]; + assert.ok(table.boundingRegions?.[0].polygon); + assert.equal(table.boundingRegions?.[0].pageNumber, 1); + }); + + it("pdf file stream without passing content type", async () => { + const filePath = path.join(ASSET_PATH, "forms", "Invoice_1.pdf"); + + const base64Source = fs.readFileSync(filePath, { encoding: "base64" }); + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-layout") + .post({ + contentType: "application/json", + body: { + base64Source, + }, + queryParameters: { locale: "en-IN" }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + const analyzeResult = ( + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const pages = analyzeResult?.pages; + const tables = analyzeResult?.tables; + assert.isNotEmpty(pages); + assert.isNotEmpty(tables); + const [table] = tables as DocumentTableOutput[]; + assert.ok(table.boundingRegions?.[0].polygon); + assert.equal(table.boundingRegions?.[0].pageNumber, 1); + }); + + it("url", async () => { + const url = makeTestUrl("/Invoice_1.pdf"); + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-layout") + .post({ + contentType: "application/json", + body: { + urlSource: url, + }, + queryParameters: { locale: "en-IN" }, + }); + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + const analyzeResult = ( + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const pages = analyzeResult?.pages; + const tables = analyzeResult?.tables; + + assert.isNotEmpty(pages); + + assert.isNotEmpty(tables); + const [table] = tables as DocumentTableOutput[]; + assert.ok(table.boundingRegions?.[0].polygon); + assert.equal(table.boundingRegions?.[0].pageNumber, 1); + }); + + it("with selection marks", async () => { + const filePath = path.join(ASSET_PATH, "forms", "selection_mark_form.pdf"); + const base64Source = fs.readFileSync(filePath, { encoding: "base64" }); + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-layout") + .post({ + contentType: "application/json", + body: { + base64Source, + }, + queryParameters: { locale: "en-IN" }, + }); + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = getLongRunningPoller(client, initialResponse); + const analyzeResult = ( + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + const pages = analyzeResult?.pages; + assert.equal(pages?.[0].pageNumber, 1); + assert.isNotEmpty(pages?.[0].selectionMarks); + }); + + it("invalid locale throws", async () => { + const url = makeTestUrl("/Invoice_1.pdf"); + + try { + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-layout") + .post({ + contentType: "application/json", + body: { + urlSource: url, + }, + queryParameters: { locale: "thisIsNotAValidLanguage" }, + }); + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput; + assert.fail("Expected an exception due to invalid locale."); + } catch (ex: any) { + assert.ok((ex as Error).message.includes("Invalid argument.")); + } + }); + + it("specifying pages", async () => { + const url = makeTestUrl("/Invoice_1.pdf"); + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-layout") + .post({ + contentType: "application/json", + body: { + urlSource: url, + }, + queryParameters: { locale: "en-IN", pages: "1" }, + }); + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + await (await poller).pollUntilDone(); + }); + + it("invalid pages throws", async () => { + const url = makeTestUrl("/Invoice_1.pdf"); + + try { + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-layout") + .post({ + contentType: "application/json", + body: { + urlSource: url, + }, + queryParameters: { locale: "en-IN", pages: "2" }, + }); + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + await (await poller).pollUntilDone(); + assert.fail("Expected an exception due to invalid pages."); + } catch (ex: any) { + // Just make sure we didn't get a bad error message + assert.isFalse((ex as Error).message.includes("")); + } + }); + + it("barcode", async function () { + const url = makeTestUrl("/barcode2.tif"); + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-read") + .post({ + contentType: "application/json", + body: { + urlSource: url, + }, + queryParameters: { features: ["barcodes"] }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + const analyzeResult = ( + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const pages = analyzeResult?.pages; + + assert.isNotEmpty(pages); + + assert.isNotEmpty(pages?.[0].barcodes); + + const [barcode1, barcode2] = pages?.[0].barcodes as DocumentBarcodeOutput[]; + + assert.equal(barcode1.kind, "Code39"); + assert.equal(barcode1.value, "D589992-X"); + + assert.equal(barcode2.kind, "Code39"); + assert.equal(barcode2.value, "SYN121720213429"); + }); + + it("annotations", async function () { + const url = makeTestUrl("/annotations.jpg"); + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-layout") + .post({ + contentType: "application/json", + body: { + urlSource: url, + }, + queryParameters: { locale: "en-IN" }, + }); + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + const analyzeResult = ( + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const pages = analyzeResult?.pages; + + assert.isNotEmpty(pages); + }); + + it("formula", async function () { + const url = makeTestUrl("/formula1.jpg"); + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-read") + .post({ + contentType: "application/json", + body: { + urlSource: url, + }, + queryParameters: { features: ["formulas"] }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + const analyzeResult = ( + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const pages = analyzeResult?.pages; + + assert.isNotEmpty(pages); + + assert.isNotEmpty(pages?.[0].formulas); + }); + }); + + describe("custom forms", () => { + let _model: DocumentModelDetailsOutput; + let modelName: string; + + // We only want to create the model once, but because of the recorder's + // precedence, we have to create it in a test, so one test will end up + // recording the entire creation and the other tests will still be able + // to use it. + async function requireModel(): Promise { + if (!_model) { + modelName = recorder.variable( + "customFormModelName", + `customFormModelName${getRandomNumber()}` + ); + + const initialResponse = await client.path("/documentModels:build").post({ + body: { + buildMode: "template", + modelId: modelName, + azureBlobSource: { + containerUrl: assertEnvironmentVariable( + "DOCUMENT_INTELLIGENCE_SELECTION_MARK_STORAGE_CONTAINER_SAS_URL" + ), + }, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = getLongRunningPoller(client, initialResponse); + const response = ( + (await (await poller).pollUntilDone()).body as DocumentModelBuildOperationDetailsOutput + ).result; + if (!response) { + throw new Error("Expected a DocumentModelDetailsOutput response."); + } + _model = response; + assert.ok(_model.modelId); + } + + return _model; + } + + it("with selection marks", async () => { + const { modelId } = await requireModel(); + + const filePath = path.join(ASSET_PATH, "forms", "selection_mark_form.pdf"); + + const base64Source = fs.readFileSync(filePath, { encoding: "base64" }); + + const initialResponse = await client.path("/documentModels/{modelId}:analyze", modelId).post({ + contentType: "application/json", + body: { + base64Source, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + const analyzeResult = ( + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const documents = analyzeResult?.documents; + const pages = analyzeResult?.pages; + assert.ok(documents); + assert.equal(documents?.[0].docType, `${modelName}:${modelName}`); + assert.ok(pages?.[0]); + + /* There should be a table in the response, but it isn't recognized (maybe because it's too small or sparse) + assert.isNotEmpty(tables); + const [table] = tables!; + assert.ok(table.boundingRegions?.[0].boundingBox); + assert.equal(table.boundingRegions?.[0].pageNumber, 1);*/ + + assert.equal(pages?.[0].pageNumber, 1); + assert.isNotEmpty(pages?.[0].selectionMarks); + }); + + it("png file stream", async () => { + const filePath = path.join(ASSET_PATH, "forms", "Invoice_1.pdf"); + + const base64Source = fs.readFileSync(filePath, { encoding: "base64" }); + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-invoice") + .post({ + contentType: "application/json", + body: { + base64Source, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + const analyzeResult = ( + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + const documents = analyzeResult?.documents; + assert.isNotEmpty(documents); + + assert.equal(documents?.[0].docType, "invoice"); + }); + }); + + describe("receipts", () => { + it("png file stream", async () => { + const filePath = path.join(ASSET_PATH, "receipt", "contoso-receipt.png"); + + const base64Source = fs.readFileSync(filePath, { encoding: "base64" }); + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-receipt") + .post({ + contentType: "application/json", + body: { + base64Source, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + const analyzeResult = ( + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + const documents = analyzeResult?.documents; + assert.isNotEmpty(documents); + + assert.equal(documents?.[0].docType, "receipt.retailMeal"); + }); + + it("jpeg file stream", async () => { + const filePath = path.join(ASSET_PATH, "receipt", "contoso-allinone.jpg"); + + const base64Source = fs.readFileSync(filePath, { encoding: "base64" }); + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-receipt") + .post({ + contentType: "application/json", + body: { + base64Source, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + const analyzeResult = ( + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + const documents = analyzeResult?.documents; + assert.isNotEmpty(documents); + + assert.equal(documents?.[0].docType, "receipt.retailMeal"); + }); + + it("url", async () => { + const url = makeTestUrl("/contoso-allinone.jpg"); + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-receipt") + .post({ + contentType: "application/json", + body: { + urlSource: url, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + const analyzeResult = ( + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + const documents = analyzeResult?.documents; + assert.isNotEmpty(documents); + + assert.equal(documents?.[0].docType, "receipt.retailMeal"); + }); + + it("specifying locale", async () => { + const url = makeTestUrl("/contoso-allinone.jpg"); + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-receipt") + .post({ + contentType: "application/json", + body: { + urlSource: url, + }, + queryParameters: { locale: "en-IN" }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput; + }); + + it("invalid locale throws", async () => { + const url = makeTestUrl("/contoso-allinone.jpg"); + + try { + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-receipt") + .post({ + contentType: "application/json", + body: { + urlSource: url, + }, + queryParameters: { locale: "thisIsNotAValidLanguage" }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput; + assert.fail("Expected an exception due to invalid locale."); + } catch (ex: any) { + assert.ok((ex as Error).message.includes("Invalid argument.")); + } + }); + }); + + describe("invoices", () => { + it("pdf file stream", async () => { + const filePath = path.join(ASSET_PATH, "invoice", "Invoice_1.pdf"); + + const base64Source = fs.readFileSync(filePath, { encoding: "base64" }); + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-invoice") + .post({ + contentType: "application/json", + body: { + base64Source, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + const analyzeResult = ( + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + const documents = analyzeResult?.documents; + const pages = analyzeResult?.pages; + const tables = analyzeResult?.tables; + + assert.isNotEmpty(documents); + assert.isNotEmpty(pages); + assert.isNotEmpty(tables); + const [table] = tables!; + assert.ok(table.boundingRegions?.[0].polygon); + assert.equal(table.boundingRegions?.[0].pageNumber, 1); + }); + + it("url", async () => { + const url = makeTestUrl("/Invoice_1.pdf"); + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-invoice") + .post({ + contentType: "application/json", + body: { + urlSource: url, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + const analyzeResult = ( + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + const documents = analyzeResult?.documents; + const pages = analyzeResult?.pages; + const tables = analyzeResult?.tables; + + assert.isNotEmpty(documents); + assert.isNotEmpty(pages); + assert.isNotEmpty(tables); + const [table] = tables!; + assert.ok(table.boundingRegions?.[0].polygon); + assert.equal(table.boundingRegions?.[0].pageNumber, 1); + }); + + it("invalid locale throws", async () => { + const url = makeTestUrl("/Invoice_1.pdf"); + + try { + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-invoice") + .post({ + contentType: "application/json", + body: { + urlSource: url, + }, + queryParameters: { locale: "thisIsNotAValidLanguage" }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput; + assert.fail("Expected an exception due to invalid locale."); + } catch (ex: any) { + assert.ok((ex as Error).message.includes("Invalid argument.")); + } + }); + }); + + describe("identityDocuments", () => { + it("png file stream", async () => { + const filePath = path.join(ASSET_PATH, "identityDocument", "license.png"); + + const base64Source = fs.readFileSync(filePath, { encoding: "base64" }); + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-idDocument") + .post({ + contentType: "application/json", + body: { + base64Source, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + const analyzeResult = ( + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const documents = analyzeResult?.documents; + const receipt = documents?.[0]; + + assert.isNotEmpty(documents); + + assert.equal(receipt?.docType, "idDocument.driverLicense"); + }); + + it("url", async () => { + const url = makeTestUrl("/license.jpg"); + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-idDocument") + .post({ + contentType: "application/json", + body: { + urlSource: url, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + const analyzeResult = ( + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const documents = analyzeResult?.documents; + const idDocument = documents?.[0]; + + assert.isNotEmpty(documents); + assert.equal(idDocument?.docType, "idDocument.driverLicense"); + }); + + it("invalid locale throws", async () => { + const url = makeTestUrl("/license.png"); + + try { + await client.path("/documentModels/{modelId}:analyze", "prebuilt-idDocument").post({ + contentType: "application/json", + body: { + urlSource: url, + }, + queryParameters: { + locale: "thisIsNotAValidLocaleString", + }, + }); + assert.fail("Expected an exception due to invalid locale."); + } catch (ex: any) { + // Just make sure we didn't get a bad error message + assert.isFalse((ex as Error).message.includes("")); + } + }); + }); + + describe("tax - US - w2", () => { + it("png file stream", async function (this: Mocha.Context) { + const filePath = path.join(ASSET_PATH, "w2", "w2-single.png"); + // + + const base64Source = fs.readFileSync(filePath, { encoding: "base64" }); + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-tax.us.w2") + .post({ + contentType: "application/json", + body: { + base64Source, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + const analyzeResult = ( + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const documents = analyzeResult?.documents; + const w2Naive = documents?.[0]; + + assert.isNotEmpty(documents); + + assert.equal(w2Naive?.docType, "tax.us.w2"); + }); + }); + + describe("healthInsuranceCard - US", function () { + it("png file stream", async function (this: Mocha.Context) { + const filePath = path.join(ASSET_PATH, "healthInsuranceCard", "insurance.png"); + + const base64Source = fs.readFileSync(filePath, { encoding: "base64" }); + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", "prebuilt-healthInsuranceCard.us") + .post({ + contentType: "application/json", + body: { + base64Source, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + const analyzeResult = ( + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const documents = analyzeResult?.documents; + + assert.isNotEmpty(documents); + }); + }); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/test/public/classifiers.spec.ts b/sdk/documentintelligence/ai-document-intelligence-rest/test/public/classifiers.spec.ts new file mode 100644 index 000000000000..78743a033d05 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/test/public/classifiers.spec.ts @@ -0,0 +1,179 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +import { Recorder, assertEnvironmentVariable } from "@azure-tools/test-recorder"; +import { createRecorder, testPollingOptions } from "./utils/recorderUtils"; +import { Context } from "mocha"; +import DocumentIntelligence, { + AnalyzeResultOperationOutput, + DocumentClassifierBuildOperationDetailsOutput, + DocumentClassifierDetailsOutput, + DocumentIntelligenceClient, + getLongRunningPoller, + isUnexpected, +} from "../../src"; +import { assert } from "chai"; +import { ASSET_PATH, getRandomNumber, makeTestUrl } from "./utils/utils"; +import path from "path"; +import fs from "fs"; + +const containerSasUrl = (): string => + assertEnvironmentVariable("DOCUMENT_INTELLIGENCE_TRAINING_CONTAINER_SAS_URL"); + +describe("classifiers", () => { + let recorder: Recorder; + let client: DocumentIntelligenceClient; + beforeEach(async function (this: Context) { + recorder = await createRecorder(this); + await recorder.setMatcher("BodilessMatcher"); + client = DocumentIntelligence( + assertEnvironmentVariable("DOCUMENT_INTELLIGENCE_ENDPOINT"), + { key: assertEnvironmentVariable("DOCUMENT_INTELLIGENCE_API_KEY") }, + recorder.configureClientOptions({}) + ); + }); + + afterEach(async function () { + await recorder.stop(); + }); + + let _classifier: DocumentClassifierDetailsOutput; + let _classifierId: string; + + const customClassifierDescription = "Custom classifier description"; + + // We only want to create the model once, but because of the recorder's + // precedence, we have to create it in a test, so one test will end up + // recording the entire creation and the other tests will still be able + // to use it. + async function requireClassifier(): Promise { + if (!_classifier) { + _classifierId = recorder.variable( + "customClassifierId", + `customClassifier${getRandomNumber()}` + ); + + const initialResponse = await client.path("/documentClassifiers:build").post({ + body: { + classifierId: _classifierId, + description: "Custom classifier description", + docTypes: { + foo: { + azureBlobSource: { + containerUrl: containerSasUrl(), + }, + }, + bar: { + azureBlobSource: { + containerUrl: containerSasUrl(), + }, + }, + }, + }, + queryParameters: { customClassifierDescription }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = getLongRunningPoller(client, initialResponse); + const response = ( + (await (await poller).pollUntilDone()).body as DocumentClassifierBuildOperationDetailsOutput + ).result; + if (!response) { + throw new Error("Expected a DocumentClassifierDetailsOutput response."); + } + _classifier = response; + + assert.ok(_classifier.classifierId); + } + + return _classifier; + } + + it("build classifier", async function (this: Context) { + const classifier = await requireClassifier(); + + assert.containsAllKeys(classifier.docTypes, ["foo", "bar"]); + assert.equal(classifier.classifierId, _classifierId); + assert.equal(classifier.description, customClassifierDescription); + }); + + it("analyze from PNG file stream", async function (this: Context) { + const filePath = path.join(ASSET_PATH, "forms", "Invoice_1.pdf"); + const { classifierId } = await requireClassifier(); + const base64Source = fs.readFileSync(filePath, { encoding: "base64" }); + + const initialResponse = await client + .path("/documentClassifiers/{classifierId}:analyze", classifierId) + .post({ + contentType: "application/json", + body: { + base64Source, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + const analyzeResult = ( + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + assert.isNotEmpty(analyzeResult?.documents); + assert.oneOf(analyzeResult?.documents![0].docType, ["foo", "bar"]); + + // Additionally check that the pages aren't empty and that there are some common fields set + assert.isNotEmpty(analyzeResult?.pages); + assert.ok(analyzeResult?.pages![0].pageNumber); + assert.isDefined(analyzeResult?.pages![0].angle); + assert.ok(analyzeResult?.pages![0].height); + assert.ok(analyzeResult?.pages![0].width); + assert.ok(analyzeResult?.pages![0].unit); + }); + + it("analyze from PNG file URL", async function (this: Context) { + const url = makeTestUrl("/Invoice_1.pdf"); + const { classifierId } = await requireClassifier(); + + const initialResponse = await client + .path("/documentClassifiers/{classifierId}:analyze", classifierId) + .post({ + contentType: "application/json", + body: { + urlSource: url, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + const analyzeResult = ( + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + assert.isNotEmpty(analyzeResult?.documents); + assert.oneOf(analyzeResult?.documents![0].docType, ["foo", "bar"]); + }); + + it("get & delete classifiers from the account", async function () { + await client.path("/documentClassifiers/{classifierId}", _classifierId).get(); + + // Delete the custom classifier we created + if (_classifierId) { + await client.path("/documentClassifiers/{classifierId}", _classifierId).delete(); + } + + // Try to get the classifier and assert that it's gone + try { + await client.path("/documentClassifiers/{classifierId}", _classifierId).get(); + assert.fail("Expected error while accessing a deleted classifier"); + } catch (error: any) { + assert.ok(error); + } + }); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/test/public/documentIntelligence.spec.ts b/sdk/documentintelligence/ai-document-intelligence-rest/test/public/documentIntelligence.spec.ts new file mode 100644 index 000000000000..0b392e67070a --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/test/public/documentIntelligence.spec.ts @@ -0,0 +1,101 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +import { Recorder, assertEnvironmentVariable } from "@azure-tools/test-recorder"; +import { createTestCredential } from "@azure-tools/test-credential"; +import { createRecorder } from "./utils/recorderUtils"; +import { Context } from "mocha"; +import DocumentIntelligence, { + DocumentClassifierBuildOperationDetailsOutput, + DocumentIntelligenceClient, + getLongRunningPoller, + isUnexpected, +} from "../../src"; +import assert from "assert"; +import { getRandomNumber } from "./utils/utils"; +import { containerSasUrl } from "./utils/utils"; + +describe("DocumentIntelligenceClient", () => { + let recorder: Recorder; + let client: DocumentIntelligenceClient; + beforeEach(async function (this: Context) { + recorder = await createRecorder(this); + await recorder.setMatcher("BodilessMatcher"); + client = DocumentIntelligence( + assertEnvironmentVariable("DOCUMENT_INTELLIGENCE_ENDPOINT"), + { key: assertEnvironmentVariable("DOCUMENT_INTELLIGENCE_API_KEY") }, + recorder.configureClientOptions({}) + ); + }); + + afterEach(async function () { + await recorder.stop(); + }); + + it("API Key works - getInfo", async function () { + const response = await client.path("/info").get(); + if (isUnexpected(response)) { + throw response.body.error; + } + assert.strictEqual( + response.body.customDocumentModels.limit, + 20000, + "expected customDocumentModels limit should be 20000" + ); + }); + + it.skip("AAD works - getInfo", async function () { + client = DocumentIntelligence( + assertEnvironmentVariable("DOCUMENT_INTELLIGENCE_ENDPOINT"), + createTestCredential(), + recorder.configureClientOptions({}) + ); + const response = await client.path("/info").get(); + if (isUnexpected(response)) { + throw response.body.error; + } + assert.strictEqual( + response.body.customDocumentModels.limit, + 20000, + "expected customDocumentModels limit should be 20000" + ); + }); + + it("documentClassifiers build", async function () { + const initialResponse = await client.path("/documentClassifiers:build").post({ + body: { + classifierId: recorder.variable( + "customClassifierId", + `customClassifier${getRandomNumber()}` + ), + description: "Custom classifier description", + docTypes: { + foo: { + azureBlobSource: { + containerUrl: containerSasUrl(), + }, + }, + bar: { + // Adding source kind fails with 400 Invalid Argument + azureBlobSource: { + containerUrl: containerSasUrl(), + }, + }, + }, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = getLongRunningPoller(client, initialResponse); + const response = ( + (await (await poller).pollUntilDone()).body + ); + assert.strictEqual( + response.result?.classifierId, + recorder.variable("customClassifierId"), + "expected classifierId to match" + ); + }); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/test/public/training.spec.ts b/sdk/documentintelligence/ai-document-intelligence-rest/test/public/training.spec.ts new file mode 100644 index 000000000000..dc2b9d2fdf18 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/test/public/training.spec.ts @@ -0,0 +1,357 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +import { assert } from "chai"; +import { Recorder, assertEnvironmentVariable } from "@azure-tools/test-recorder"; +import { createRecorder, testPollingOptions } from "./utils/recorderUtils"; +import { Context } from "mocha"; +import DocumentIntelligence, { + AnalyzeResultOperationOutput, + ComponentDocumentModelDetails, + DocumentIntelligenceClient, + DocumentModelBuildOperationDetailsOutput, + DocumentModelComposeOperationDetailsOutput, + DocumentModelCopyToOperationDetailsOutput, + DocumentModelDetailsOutput, + getLongRunningPoller, + isUnexpected, + paginate, +} from "../../src"; +import { getRandomNumber } from "./utils/utils"; +import { containerSasUrl } from "./utils/utils"; + +describe("model management", () => { + let recorder: Recorder; + let client: DocumentIntelligenceClient; + beforeEach(async function (this: Context) { + recorder = await createRecorder(this); + client = DocumentIntelligence( + assertEnvironmentVariable("DOCUMENT_INTELLIGENCE_ENDPOINT"), + { key: assertEnvironmentVariable("DOCUMENT_INTELLIGENCE_API_KEY") }, + recorder.configureClientOptions({}) + ); + }); + + afterEach(async function () { + await recorder.stop(); + }); + + // #region Model Training + + /* + * All test steps that are related to training and validating + * models from source documents are encapsulated in this + * "describe" block + */ + + describe("model build", async function () { + const allModels: string[] = []; + + let id = 0; + function getId(): number { + return (id += 1); + } + + describe(`custom model from trainingdata-v3`, async () => { + let _model: DocumentModelDetailsOutput; + + let modelId: string; + + // We only want to create the model once, but because of the recorder's + // precedence, we have to create it in a test, so one test will end up + // recording the entire creation and the other tests will still be able + // to use it + async function requireModel(): Promise { + if (!_model) { + // Compute a unique name for the model + modelId = recorder.variable(getId().toString(), `modelName${getRandomNumber()}`); + const initialResponse = await client.path("/documentModels:build").post({ + body: { + buildMode: "template", + modelId: modelId, + azureBlobSource: { + containerUrl: containerSasUrl(), + }, + }, + }); + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = getLongRunningPoller(client, initialResponse); + const response = ( + (await (await poller).pollUntilDone()).body as DocumentModelBuildOperationDetailsOutput + ).result; + if (!response) { + throw new Error("Expected a DocumentModelDetailsOutput response."); + } + _model = response; + + assert.equal(_model.modelId, modelId); + + allModels.push(_model.modelId); + } + + return _model; + } + + /* + * Make sure the model training API returns correct information + * for the model. + */ + it("validate model training response", async () => { + const model = await requireModel(); + + assert.ok(model, "Expecting valid response"); + assert.ok(model.modelId); + + assert.isNotEmpty(model.docTypes); + const submodel = model.docTypes![model.modelId]; + + // When training with labels, we will have expectations for the names + assert.ok( + submodel.fieldSchema["Signature"], + "Expecting field with name 'Signature' to be valid" + ); + }); + + /* + * Use the model for some simple recognition + */ + describe("recognition", async () => { + it("form from url", async () => { + const model = await requireModel(); + const urlParts = containerSasUrl().split("?"); + const url = `${urlParts[0]}/Form_1.jpg?${urlParts[1]}`; + + const initialResponse = await client + .path("/documentModels/{modelId}:analyze", model.modelId) + .post({ + contentType: "application/json", + body: { + urlSource: url, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + + const poller = getLongRunningPoller(client, initialResponse, { ...testPollingOptions }); + const analyzeResult = ( + (await (await poller).pollUntilDone()).body as AnalyzeResultOperationOutput + ).analyzeResult; + + const documents = analyzeResult?.documents; + const tables = analyzeResult?.tables; + assert.isNotEmpty(documents); + const document = documents?.[0]; + + assert.isNotEmpty(document?.boundingRegions); + + assert.isNotEmpty(tables); + const [table] = tables!; + + assert.ok(table.boundingRegions?.[0].polygon); + assert.equal(table.boundingRegions?.[0].pageNumber, 1); + + assert.ok(document?.fields); + assert.ok(document?.fields?.["Merchant"]); + assert.ok(document?.fields?.["DatedAs"]); + assert.ok(document?.fields?.["CompanyPhoneNumber"]); + assert.ok(document?.fields?.["CompanyName"]); + assert.ok(document?.fields?.["Signature"]); + }); + }); + + it("getModel() verification", async () => { + const model = await requireModel(); + + const modelDetails = await client.path("/documentModels/{modelId}", model.modelId).get(); + + if (isUnexpected(modelDetails)) { + throw modelDetails.body.error; + } + + assert.strictEqual(modelDetails.body.modelId, model.modelId); + assert.strictEqual(modelDetails.body.description, model.description); + assert.ok(modelDetails.body.docTypes); + }); + }); + + /* + * These are tests that check that model querying functions as expected. + * This section also cleans up the models by deleting them. + */ + describe("model information", async () => { + it("iterate models in account", async () => { + const response = await client.path("/documentModels").get(); + if (isUnexpected(response)) { + throw response.body.error; + } + + const modelsInAccount: string[] = []; + for await (const model of paginate(client, response)) { + assert.ok(model.modelId); + modelsInAccount.push(model.modelId); + } + + for (const modelId of allModels) { + assert.isTrue(modelsInAccount.includes(modelId)); + } + }); + + it("delete models from the account", async () => { + // Delete all of the models + await Promise.all( + allModels.map((modelId) => client.path("/documentModels/{modelId}", modelId).delete()) + ); + + await Promise.all( + allModels.map(async (modelId) => { + try { + const res = await client.path("/documentModels/{modelId}", modelId).get(); + if (isUnexpected(res)) { + throw res.body.error; + } + console.log(`Model ${res.body.modelId} was not deleted!`); + throw new Error( + `The service returned model info for ${modelId}, but we thought we had deleted it!` + ); + } catch (e: unknown) { + assert.isTrue((e as Error).message.endsWith(" not found.")); + } + }) + ); + }); + }); + }); + + // #endregion + + it(`compose model`, async function () { + // Helper function to train/validate single model + async function makeModel(prefix: string): Promise { + const modelId = recorder.variable(prefix, `${prefix}${getRandomNumber()}`); + const initialResponse = await client.path("/documentModels:build").post({ + body: { + buildMode: "template", + modelId: modelId, + azureBlobSource: { + containerUrl: containerSasUrl(), + }, + }, + }); + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = getLongRunningPoller(client, initialResponse); + const model = ( + (await (await poller).pollUntilDone()).body as DocumentModelBuildOperationDetailsOutput + ).result!; + + assert.equal(model.modelId, modelId); + assert.equal(model.modelId, modelId); + assert.ok(model.docTypes); + + return { modelId: model.modelId }; + } + + const componentModelIds = await Promise.all([makeModel("input1"), makeModel("input2")]); + + const modelId = recorder.variable("composedModelName", `composedModelName${getRandomNumber()}`); + const initialResponse = await client.path("/documentModels:compose").post({ + body: { + componentModels: componentModelIds, + modelId, + }, + }); + + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const poller = getLongRunningPoller(client, initialResponse); + + const composedModel = ( + (await (await poller).pollUntilDone()).body as DocumentModelComposeOperationDetailsOutput + ).result!; + assert.ok(composedModel.modelId); + assert.equal(composedModel.modelId, modelId); + assert.ok(composedModel.docTypes); + + // Submodels + assert.equal(Object.entries(composedModel.docTypes ?? {}).length, 2); + }); + + it(`copy model`, async function () { + // Since this test is isolated, we'll create a fresh set of resources for it + await recorder.addSanitizers( + { + bodyKeySanitizers: [ + { + jsonPath: "$.accessToken", + value: "access_token", + }, + ], + }, + ["playback", "record"] + ); + const modelId = recorder.variable("copySource", `copySource${getRandomNumber()}`); + + const initialResponse = await client.path("/documentModels:build").post({ + body: { + buildMode: "template", + modelId: modelId, + azureBlobSource: { + containerUrl: containerSasUrl(), + }, + }, + }); + if (isUnexpected(initialResponse)) { + throw initialResponse.body.error; + } + const trainingPoller = getLongRunningPoller(client, initialResponse); + const sourceModel = ( + (await (await trainingPoller).pollUntilDone()) + .body as DocumentModelBuildOperationDetailsOutput + ).result!; + + assert.equal(sourceModel.modelId, modelId); + + const targetModelId = recorder.variable("copyTarget", `copyTarget${getRandomNumber()}`); + const targetAuth = await client.path("/documentModels:authorizeCopy").post({ + body: { + modelId: targetModelId, + }, + }); + + if (isUnexpected(targetAuth)) { + throw targetAuth.body.error; + } + const copyInitResponse = await client + .path("/documentModels/{modelId}:copyTo", sourceModel.modelId) + .post({ + body: targetAuth.body, + }); + + if (isUnexpected(copyInitResponse)) { + throw copyInitResponse.body.error; + } + const copyPoller = getLongRunningPoller(client, copyInitResponse); + const copyResult = ( + (await (await copyPoller).pollUntilDone()).body as DocumentModelCopyToOperationDetailsOutput + ).result!; + + assert.ok(copyResult, "Expecting valid copy result"); + assert.equal(copyResult.modelId, targetAuth.body.targetModelId); + + assert.ok(copyResult.createdDateTime, "Expecting valid 'trainingStartedOn' property"); + + const targetModel = await client.path("/documentModels/{modelId}", copyResult.modelId).get(); + + if (isUnexpected(targetModel)) { + throw targetModel.body.error; + } + assert.equal(targetModel.body.modelId, targetAuth.body.targetModelId); + assert.equal(targetModel.body.modelId, copyResult.modelId); + }); +}); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/test/public/utils/recorderUtils.ts b/sdk/documentintelligence/ai-document-intelligence-rest/test/public/utils/recorderUtils.ts new file mode 100644 index 000000000000..66623c5910eb --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/test/public/utils/recorderUtils.ts @@ -0,0 +1,83 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +import { Context } from "mocha"; +import { Recorder, RecorderStartOptions, env, isPlaybackMode } from "@azure-tools/test-recorder"; + +const envSetupForPlayback: { [k: string]: string } = { + AZURE_CLIENT_ID: "azure_client_id", + AZURE_CLIENT_SECRET: "azure_client_secret", + AZURE_TENANT_ID: "12345678-1234-1234-1234-123456789012", + DOCUMENT_INTELLIGENCE_API_KEY: "api_key", + DOCUMENT_INTELLIGENCE_ENDPOINT: "https://endpoint/", + DOCUMENT_INTELLIGENCE_TRAINING_CONTAINER_SAS_URL: + "https://storageaccount/trainingdata-v3?sastoken", + DOCUMENT_INTELLIGENCE_TESTING_CONTAINER_SAS_URL: "https://storageaccount/testingdata?sastoken", + DOCUMENT_INTELLIGENCE_SELECTION_MARK_STORAGE_CONTAINER_SAS_URL: + "https://storageaccount/selectionmark-v3?sastoken", + DOCUMENT_INTELLIGENCE_TARGET_RESOURCE_REGION: "westus2", + // fake resource id + DOCUMENT_INTELLIGENCE_TARGET_RESOURCE_ID: + "/subscriptions/e1367d46-77d4-4f57-8cfe-348edbdc84a3/resourceGroups/jstests/providers/Microsoft.CognitiveServices/accounts/jstests-fr", +}; + +export const recorderOptions: RecorderStartOptions = { + envSetupForPlayback, + sanitizerOptions: { + generalSanitizers: [ + // endpoints + { + target: env["DOCUMENT_INTELLIGENCE_TRAINING_CONTAINER_SAS_URL"]?.split("/")[2] || "", + value: + envSetupForPlayback["DOCUMENT_INTELLIGENCE_TRAINING_CONTAINER_SAS_URL"].split("/")[2], + }, + { + target: env["DOCUMENT_INTELLIGENCE_TESTING_CONTAINER_SAS_URL"]?.split("/")[2] || "", + value: envSetupForPlayback["DOCUMENT_INTELLIGENCE_TESTING_CONTAINER_SAS_URL"].split("/")[2], + }, + { + target: + env["DOCUMENT_INTELLIGENCE_SELECTION_MARK_STORAGE_CONTAINER_SAS_URL"]?.split("/")[2] || + "", + value: + envSetupForPlayback[ + "DOCUMENT_INTELLIGENCE_SELECTION_MARK_STORAGE_CONTAINER_SAS_URL" + ].split("/")[2], + }, + // sas tokens + { + target: env["DOCUMENT_INTELLIGENCE_TRAINING_CONTAINER_SAS_URL"]?.split("?")[1] || "", + value: + envSetupForPlayback["DOCUMENT_INTELLIGENCE_TRAINING_CONTAINER_SAS_URL"].split("?")[1], + }, + { + target: env["DOCUMENT_INTELLIGENCE_TESTING_CONTAINER_SAS_URL"]?.split("?")[1] || "", + value: envSetupForPlayback["DOCUMENT_INTELLIGENCE_TESTING_CONTAINER_SAS_URL"].split("?")[1], + }, + { + target: + env["DOCUMENT_INTELLIGENCE_SELECTION_MARK_STORAGE_CONTAINER_SAS_URL"]?.split("?")[1] || + "", + value: + envSetupForPlayback[ + "DOCUMENT_INTELLIGENCE_SELECTION_MARK_STORAGE_CONTAINER_SAS_URL" + ].split("?")[1], + }, + ], + }, +}; + +/** + * creates the recorder and reads the environment variables from the `.env` file. + * Should be called first in the test suite to make sure environment variables are + * read before they are being used. + */ +export async function createRecorder(context: Context): Promise { + const recorder = new Recorder(context.currentTest); + await recorder.start(recorderOptions); + return recorder; +} + +export const testPollingOptions = { + intervalInMs: isPlaybackMode() ? 0 : undefined, +}; diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/test/public/utils/utils.ts b/sdk/documentintelligence/ai-document-intelligence-rest/test/public/utils/utils.ts new file mode 100644 index 000000000000..e57910da0dcd --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/test/public/utils/utils.ts @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +import { assertEnvironmentVariable } from "@azure-tools/test-recorder"; +import { createClientLogger } from "@azure/logger"; + +import path from "path"; + +export const ASSET_PATH = path.resolve(path.join(process.cwd(), "assets")); + +export function makeTestUrl(urlPath: string): string { + const testingContainerUrl = assertEnvironmentVariable( + "DOCUMENT_INTELLIGENCE_TESTING_CONTAINER_SAS_URL" + ); + const parts = testingContainerUrl.split("?"); + return `${parts[0]}${urlPath}?${parts[1]}`; +} + +export function getRandomNumber(): number { + return Math.ceil(Math.random() * 1000 + 10000); +} + +export const containerSasUrl = (): string => + assertEnvironmentVariable("DOCUMENT_INTELLIGENCE_TRAINING_CONTAINER_SAS_URL"); + +export const logger = createClientLogger("ai-form-recognizer:test"); diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/tests.yml b/sdk/documentintelligence/ai-document-intelligence-rest/tests.yml new file mode 100644 index 000000000000..805cdac774ee --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/tests.yml @@ -0,0 +1,22 @@ +# Parameterize Location to the test resources deployment script. This allows +# specifying a different Azure region to use when manually triggering the live +# tests pipeline. This is useful for when we want to test in different +# environments: Prod, Canary, etc. +parameters: + - name: Location + displayName: Location + type: string + default: eastus + +trigger: none + +stages: + - template: /eng/pipelines/templates/stages/archetype-sdk-tests.yml + parameters: + PackageName: "@azure-rest/ai-document-intelligence" + ServiceDirectory: documentintelligence + Location: "${{ parameters.Location }}" + EnvVars: + AZURE_CLIENT_ID: $(aad-azure-sdk-test-client-id) + AZURE_TENANT_ID: $(aad-azure-sdk-test-tenant-id) + AZURE_CLIENT_SECRET: $(aad-azure-sdk-test-client-secret) diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/tsconfig.json b/sdk/documentintelligence/ai-document-intelligence-rest/tsconfig.json new file mode 100644 index 000000000000..8c6804f2d864 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "../../../tsconfig.package", + "compilerOptions": { + "outDir": "./dist-esm", + "declarationDir": "./types", + "paths": { "@azure-rest/ai-document-intelligence": ["./src/index"] } + }, + "include": ["./src/**/*.ts", "./test/**/*.ts", "samples-dev/**/*.ts"] +} diff --git a/sdk/documentintelligence/ai-document-intelligence-rest/tsp-location.yaml b/sdk/documentintelligence/ai-document-intelligence-rest/tsp-location.yaml new file mode 100644 index 000000000000..ef9a8f9c4811 --- /dev/null +++ b/sdk/documentintelligence/ai-document-intelligence-rest/tsp-location.yaml @@ -0,0 +1,4 @@ +directory: specification/ai/DocumentIntelligence +commit: e237298f554742f54ab7e62c4a96080f972bcfa5 +repo: Azure/azure-rest-api-specs +additionalDirectories: \ No newline at end of file diff --git a/sdk/documentintelligence/ci.yml b/sdk/documentintelligence/ci.yml new file mode 100644 index 000000000000..e8c3407c193e --- /dev/null +++ b/sdk/documentintelligence/ci.yml @@ -0,0 +1,31 @@ +# NOTE: Please refer to https://aka.ms/azsdk/engsys/ci-yaml before editing this file. + +trigger: + branches: + include: + - main + - release/* + - hotfix/* + paths: + include: + - sdk/documentintelligence/ +pr: + branches: + include: + - main + - feature/* + - release/* + - hotfix/* + exclude: + - feature/v4 + paths: + include: + - sdk/documentintelligence/ai-document-intelligence-rest/ + +extends: + template: /eng/pipelines/templates/stages/archetype-sdk-client.yml + parameters: + ServiceDirectory: documentintelligence + Artifacts: + - name: azure-rest-ai-document-intelligence + safeName: azurerestaidocumentintelligence diff --git a/sdk/documentintelligence/test-resources.json b/sdk/documentintelligence/test-resources.json new file mode 100644 index 000000000000..ccbb836b583d --- /dev/null +++ b/sdk/documentintelligence/test-resources.json @@ -0,0 +1,138 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "baseName": { + "type": "string", + "defaultValue": "[resourceGroup().name]", + "metadata": { + "description": "The base resource name." + } + }, + "location": { + "type": "string", + "defaultValue": "eastus", + "metadata": { + "description": "The location of the resource. By default, this is the same as the resource group." + } + }, + "testApplicationOid": { + "type": "string", + "metadata": { + "description": "The AAD Client ID of the test application service principal." + } + }, + "blobStorageAccount": { + "type": "string", + "defaultValue": "azuresdktrainingdata" + }, + "trainingDataContainer": { + "type": "string", + "defaultValue": "trainingdata-v3" + }, + "selectionMarkTrainingDataContainer": { + "type": "string", + "defaultValue": "selectionmark-v3" + }, + "blobResourceId": { + "type": "string", + "defaultValue": "[resourceId('2cd617ea-1866-46b1-90e3-fffb087ebf9b', 'TrainingData', 'Microsoft.Storage/storageAccounts', parameters('blobStorageAccount'))]" + }, + "trainingDataSasProperties": { + "type": "object", + "defaultValue": { + "canonicalizedResource": "[concat('/blob/', parameters('blobStorageAccount'), '/', parameters('trainingDataContainer'))]", + "signedExpiry": "[dateTimeAdd(utcNow('u'), 'P2M')]", + "signedPermission": "rl", + "signedResource": "c" + } + }, + "selectionMarkTrainingDataSasProperties": { + "type": "object", + "defaultValue": { + "canonicalizedResource": "[concat('/blob/', parameters('blobStorageAccount'), '/', parameters('selectionMarkTrainingDataContainer'))]", + "signedExpiry": "[dateTimeAdd(utcNow('u'), 'P2M')]", + "signedPermission": "rl", + "signedResource": "c" + } + }, + "testingDataContainer": { + "type": "string", + "defaultValue": "testingdata" + }, + "testingDataSasProperties": { + "type": "object", + "defaultValue": { + "canonicalizedResource": "[concat('/blob/', parameters('blobStorageAccount'), '/', parameters('testingDataContainer'))]", + "signedExpiry": "[dateTimeAdd(utcNow('u'), 'P2M')]", + "signedPermission": "rl", + "signedResource": "c" + } + }, + "cognitiveServicesEndpointSuffix": { + "type": "string", + "defaultValue": ".cognitiveservices.azure.com", + "metadata": { + "description": "Endpoint suffix for the Cognitive Services resource. Defaults to '.cognitiveservices.azure.com'" + } + } + }, + "variables": { + // "frRoleId": "[concat('/subscriptions/', subscription().subscriptionId, '/providers/Microsoft.Authorization/roleDefinitions/a97b65f3-24c7-4388-baec-2e87135dc908')]" + }, + "resources": [ + // { + // "type": "Microsoft.CognitiveServices/accounts/providers/roleAssignments", + // "apiVersion": "2018-09-01-preview", + // "name": "[format('{0}/Microsoft.Authorization/{1}', parameters('baseName'), guid(parameters('baseName')))]", + // "dependsOn": ["[resourceId('Microsoft.CognitiveServices/accounts', parameters('baseName'))]"], + // "properties": { + // "principalId": "[parameters('testApplicationOid')]", + // "roleDefinitionId": "[variables('frRoleId')]" + // } + // }, + { + "type": "Microsoft.CognitiveServices/accounts", + "name": "[parameters('baseName')]", + "apiVersion": "2023-10-01-preview", + "sku": { + "name": "S0" + }, + "kind": "FormRecognizer", + "location": "[parameters('location')]", + "properties": { + "customSubDomainName": "[parameters('baseName')]" + } + } + ], + "outputs": { + "DOCUMENT_INTELLIGENCE_ENDPOINT": { + "type": "string", + "value": "[concat('https://', parameters('baseName'), parameters('cognitiveServicesEndpointSuffix'), '/')]" + }, + "DOCUMENT_INTELLIGENCE_API_KEY": { + "type": "string", + "value": "[listKeys(resourceId('Microsoft.CognitiveServices/accounts', parameters('baseName')), '2023-10-01-preview').key1]" + }, + "DOCUMENT_INTELLIGENCE_TRAINING_CONTAINER_SAS_URL": { + "type": "string", + "value": "[concat(reference(parameters('blobResourceId'), '2019-06-01').primaryEndpoints.blob, parameters('trainingDataContainer'), '?', listServiceSas(parameters('blobResourceId'), '2019-06-01', parameters('trainingDataSasProperties')).serviceSasToken)]" + }, + "DOCUMENT_INTELLIGENCE_TESTING_CONTAINER_SAS_URL": { + "type": "string", + "value": "[concat(reference(parameters('blobResourceId'), '2019-06-01').primaryEndpoints.blob, parameters('testingDataContainer'), '?', listServiceSas(parameters('blobResourceId'), '2019-06-01', parameters('testingDataSasProperties')).serviceSasToken)]" + }, + "DOCUMENT_INTELLIGENCE_SELECTION_MARK_STORAGE_CONTAINER_SAS_URL": { + "type": "string", + "value": "[concat(reference(parameters('blobResourceId'), '2019-06-01').primaryEndpoints.blob, parameters('selectionMarkTrainingDataContainer'), '?', listServiceSas(parameters('blobResourceId'), '2019-06-01', parameters('selectionMarkTrainingDataSasProperties')).serviceSasToken)]" + }, + "DOCUMENT_INTELLIGENCE_TARGET_RESOURCE_REGION": { + "type": "string", + "value": "[parameters('location')]" + }, + "DOCUMENT_INTELLIGENCE_TARGET_RESOURCE_ID": { + "type": "string", + "value": "[resourceId('Microsoft.CognitiveServices/accounts', parameters('baseName'))]" + } + } +}