Skip to content

Commit

Permalink
[Language Text] Mitigating bug in auto detected language for TA4H (#2…
Browse files Browse the repository at this point in the history
…3720)

It adds a swagger transform to specify the type of `detectedLanguage` to be a string and then attempts to deserialize it in the convenience layer and set the iso name correctly.
  • Loading branch information
deyaaeldeen authored Nov 4, 2022
1 parent 77edd2d commit 5c6bff2
Show file tree
Hide file tree
Showing 6 changed files with 103 additions and 31 deletions.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

47 changes: 32 additions & 15 deletions sdk/cognitivelanguage/ai-language-text/src/transforms.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ import {
PiiResult as GeneratedPiiEntityRecognitionResult,
SentenceSentiment as GeneratedSentenceSentiment,
SentimentResponse as GeneratedSentimentAnalysisResult,
HealthcareEntitiesDocumentResult,
HealthcareLROResult,
HealthcareRelation,
HealthcareRelationEntity,
Expand All @@ -50,6 +49,8 @@ import {
CustomEntitiesResultDocumentsItem,
ExtractedSummaryDocumentResultWithDetectedLanguage,
AbstractiveSummaryDocumentResultWithDetectedLanguage,
HealthcareResultDocumentsItem,
DetectedLanguage,
} from "./generated";
import {
AnalyzeActionName,
Expand All @@ -74,6 +75,7 @@ import {
TextAnalysisError,
TextAnalysisErrorResult,
TextAnalysisSuccessResult,
WithDetectedLanguage,
} from "./models";
import {
AssessmentIndex,
Expand Down Expand Up @@ -363,6 +365,18 @@ export async function throwError<T>(p: Promise<T>): Promise<T> {
}
}

export function deserializeDetectedLanguage(input: string): DetectedLanguage {
function helper(str: string): undefined {
try {
return JSON.parse(str);
} catch (e) {
return undefined;
}
}
const obj = helper(input);
return obj !== undefined ? obj : ({ iso6391Name: input } as any);
}

function toHealthcareResult(
docIds: string[],
results: GeneratedHealthcareResult
Expand Down Expand Up @@ -392,20 +406,23 @@ function toHealthcareResult(
),
});
}
return transformDocumentResults<HealthcareEntitiesDocumentResult, HealthcareSuccessResult>(
docIds,
results,
{
processSuccess: ({ entities, relations, ...rest }) => {
const newEntities = entities.map(makeHealthcareEntity);
return {
entities: newEntities,
entityRelations: relations.map(makeHealthcareRelation(newEntities)),
...rest,
};
},
}
);
return transformDocumentResults<
HealthcareResultDocumentsItem,
WithDetectedLanguage<HealthcareSuccessResult>
>(docIds, results, {
processSuccess: ({ entities, relations, detectedLanguage, ...rest }) => {
const newEntities = entities.map(makeHealthcareEntity);
return {
entities: newEntities,
entityRelations: relations.map(makeHealthcareRelation(newEntities)),
// FIXME: remove this mitigation when the API fixes the representation on their end
...(detectedLanguage
? { detectedLanguage: deserializeDetectedLanguage(detectedLanguage) }
: {}),
...rest,
};
},
});
}

function toCustomSingleLabelClassificationResult(
Expand Down
14 changes: 14 additions & 0 deletions sdk/cognitivelanguage/ai-language-text/swagger/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,20 @@ directive:
where: $.definitions.JobState
transform: $.properties.lastUpdatedDateTime["x-ms-client-name"] = "modifiedOn";

- from: swagger-document
where: $.definitions
transform: >
if (!$.DocumentDetectedLanguageForHealthcare) {
$.DocumentDetectedLanguageForHealthcare = { "type": "object", "properties": { "detectedLanguage": { "type": "string" } } };
}
- from: swagger-document
where: $.definitions.HealthcareResult.properties.documents.items.allOf
transform: >
if ($[1]["$ref"] === "#/definitions/DocumentDetectedLanguage") {
$[1]["$ref"] = "#/definitions/DocumentDetectedLanguageForHealthcare";
}
# Enhance documentation strings for some exported swagger types

- from: swagger-document
Expand Down
22 changes: 22 additions & 0 deletions sdk/cognitivelanguage/ai-language-text/test/internal/utils.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.

import { assert } from "@azure/test-utils";
import { deserializeDetectedLanguage } from "../../src/transforms";

describe("Utilities", function () {
describe("deserializeDetectedLanguage", function () {
it("deserializes string", function () {
assert.deepEqual(deserializeDetectedLanguage("en"), { iso6391Name: "en" } as any);
});

it("deserializes object", function () {
const res = {
name: "English",
iso6391Name: "en",
confidenceScore: 0.98,
};
assert.deepEqual(deserializeDetectedLanguage(JSON.stringify(res)), res);
});
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -8696,7 +8696,7 @@ export const expectation71: any = [
entityRelations: [],
id: "0",
warnings: [],
detectedLanguage: { "0": "e", "1": "n" },
detectedLanguage: { iso6391Name: "en" },
isLanguageDefaulted: false,
},
{
Expand Down

0 comments on commit 5c6bff2

Please sign in to comment.