diff --git a/sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md b/sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md index d11798ae878f..fa522e9c5bf0 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md +++ b/sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md @@ -6,6 +6,7 @@ - We are now targeting the service's v3.1-preview.1 API as the default. If you would like to still use version v3.0 of the service, pass in `v3.0` to the kwarg `api_version` when creating your TextAnalyticsClient - We have added an API `recognize_pii_entities` which returns entities containing personal information for a batch of documents. Only available for API version v3.1-preview.1 and up. + - In API version v3.1-preview.2 and up, the redacted text of the document is returned on the top-level result object `RecognizePiiEntitiesResult` through property `redacted_text`. - Added `offset` and `length` properties for `CategorizedEntity`, `SentenceSentiment`, and `LinkedEntityMatch`. These properties are only available for API versions v3.1-preview.1 and up. - `length` is the number of characters in the text of these models - `offset` is the offset of the text from the start of the document diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py index 339d70df7d6f..8e371beb1361 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py +++ b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py @@ -146,6 +146,8 @@ class RecognizePiiEntitiesResult(DictMixin): :ivar entities: Recognized PII entities in the document. :vartype entities: list[~azure.ai.textanalytics.PiiEntity] + :ivar str redacted_text: Returns the text of the input document with all of the PII information + redacted out. Only returned for API versions v3.1-preview.2 and up. :ivar warnings: Warnings encountered while processing document. Results will still be returned if there are warnings, but they may not be fully accurate. :vartype warnings: list[~azure.ai.textanalytics.TextAnalyticsWarning] @@ -155,18 +157,28 @@ class RecognizePiiEntitiesResult(DictMixin): ~azure.ai.textanalytics.TextDocumentStatistics :ivar bool is_error: Boolean check for error item when iterating over list of results. Always False for an instance of a RecognizePiiEntitiesResult. + .. versionadded:: v3.1-preview.2 + The *redacted_text* parameter. """ def __init__(self, **kwargs): self.id = kwargs.get("id", None) self.entities = kwargs.get("entities", None) + self.redacted_text = kwargs.get("redacted_text", None) self.warnings = kwargs.get("warnings", []) self.statistics = kwargs.get("statistics", None) self.is_error = False def __repr__(self): - return "RecognizePiiEntitiesResult(id={}, entities={}, warnings={}, statistics={}, is_error={})" \ - .format(self.id, repr(self.entities), repr(self.warnings), repr(self.statistics), self.is_error)[:1024] + return "RecognizePiiEntitiesResult(id={}, entities={}, redacted_text={}, warnings={}, " \ + "statistics={}, is_error={})" .format( + self.id, + repr(self.entities), + self.redacted_text, + repr(self.warnings), + repr(self.statistics), + self.is_error + )[:1024] class DetectLanguageResult(DictMixin): @@ -214,9 +226,9 @@ class CategorizedEntity(DictMixin): :ivar subcategory: Entity subcategory, such as Age/Year/TimeRange etc :vartype subcategory: str :ivar int offset: The entity text offset from the start of the document. - Returned in unicode code points. Only returned for api versions v3.1-preview.1 and up. + Returned in unicode code points. Only returned for API versions v3.1-preview.1 and up. :ivar int length: The length of the entity text. Returned - in unicode code points. Only returned for api versions v3.1-preview.1 and up. + in unicode code points. Only returned for API versions v3.1-preview.1 and up. :ivar confidence_score: Confidence score between 0 and 1 of the extracted entity. :vartype confidence_score: float @@ -669,9 +681,9 @@ class LinkedEntityMatch(DictMixin): :vartype confidence_score: float :ivar text: Entity text as appears in the request. :ivar int offset: The linked entity match text offset from the start of the document. - Returned in unicode code points. Only returned for api versions v3.1-preview.1 and up. + Returned in unicode code points. Only returned for API versions v3.1-preview.1 and up. :ivar int length: The length of the linked entity match text. Returned - in unicode code points. Only returned for api versions v3.1-preview.1 and up. + in unicode code points. Only returned for API versions v3.1-preview.1 and up. :vartype text: str """ @@ -781,9 +793,9 @@ class SentenceSentiment(DictMixin): :vartype confidence_scores: ~azure.ai.textanalytics.SentimentConfidenceScores :ivar int offset: The sentence offset from the start of the document. Returned - in unicode code points. Only returned for api versions v3.1-preview.1 and up. + in unicode code points. Only returned for API versions v3.1-preview.1 and up. :ivar int length: The length of the sentence. Returned - in unicode code points. Only returned for api versions v3.1-preview.1 and up. + in unicode code points. Only returned for API versions v3.1-preview.1 and up. :ivar mined_opinions: The list of opinions mined from this sentence. For example in "The food is good, but the service is bad", we would mind these two opinions "food is good", "service is bad". Only returned diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_response_handlers.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_response_handlers.py index b07421d7847b..559fbf17cc5f 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_response_handlers.py +++ b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_response_handlers.py @@ -134,6 +134,7 @@ def pii_entities_result(entity, results): # pylint: disable=unused-argument return RecognizePiiEntitiesResult( id=entity.id, entities=[PiiEntity._from_generated(e) for e in entity.entities], # pylint: disable=protected-access + redacted_text=entity.redacted_text if hasattr(entity, "redacted_text") else None, warnings=[TextAnalyticsWarning._from_generated(w) for w in entity.warnings], # pylint: disable=protected-access statistics=TextDocumentStatistics._from_generated(entity.statistics), # pylint: disable=protected-access ) diff --git a/sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_pii_entities.test_redacted_text.yaml b/sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_pii_entities.test_redacted_text.yaml new file mode 100644 index 000000000000..da525c49fea0 --- /dev/null +++ b/sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_pii_entities.test_redacted_text.yaml @@ -0,0 +1,44 @@ +interactions: +- request: + body: '{"documents": [{"id": "0", "text": "My SSN is 859-98-0987.", "language": + "en"}]}' + headers: + Accept: + - application/json, text/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '80' + Content-Type: + - application/json + User-Agent: + - azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit) + method: POST + uri: https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/recognition/pii?showStats=false&stringIndexType=UnicodeCodePoint + response: + body: + string: '{"documents":[{"redactedText":"My SSN is ***********.","id":"0","entities":[{"text":"859-98-0987","category":"U.S. + Social Security Number (SSN)","offset":10,"length":11,"confidenceScore":0.65}],"warnings":[]}],"errors":[],"modelVersion":"2020-07-01"}' + headers: + apim-request-id: + - c5ba8c84-0e46-471a-b4c8-f02c411c20ec + content-type: + - application/json; charset=utf-8 + csp-billing-usage: + - CognitiveServices.TextAnalytics.BatchScoring=1 + date: + - Mon, 31 Aug 2020 20:15:43 GMT + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + transfer-encoding: + - chunked + x-content-type-options: + - nosniff + x-envoy-upstream-service-time: + - '78' + status: + code: 200 + message: OK +version: 1 diff --git a/sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_pii_entities.test_redacted_text_v3_1_preview_1.yaml b/sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_pii_entities.test_redacted_text_v3_1_preview_1.yaml new file mode 100644 index 000000000000..621c6af4efd5 --- /dev/null +++ b/sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_pii_entities.test_redacted_text_v3_1_preview_1.yaml @@ -0,0 +1,44 @@ +interactions: +- request: + body: '{"documents": [{"id": "0", "text": "My SSN is 859-98-0987.", "language": + "en"}]}' + headers: + Accept: + - application/json, text/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '80' + Content-Type: + - application/json + User-Agent: + - azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit) + method: POST + uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/entities/recognition/pii?showStats=false&stringIndexType=UnicodeCodePoint + response: + body: + string: '{"documents":[{"id":"0","entities":[{"text":"859-98-0987","category":"U.S. + Social Security Number (SSN)","offset":10,"length":11,"confidenceScore":0.65}],"warnings":[]}],"errors":[],"modelVersion":"2020-07-01"}' + headers: + apim-request-id: + - 4ae026d1-15d1-4d77-8913-46922e72d7cb + content-type: + - application/json; charset=utf-8 + csp-billing-usage: + - CognitiveServices.TextAnalytics.BatchScoring=1 + date: + - Mon, 31 Aug 2020 19:58:17 GMT + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + transfer-encoding: + - chunked + x-content-type-options: + - nosniff + x-envoy-upstream-service-time: + - '68' + status: + code: 200 + message: OK +version: 1 diff --git a/sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_pii_entities_async.test_redacted_text.yaml b/sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_pii_entities_async.test_redacted_text.yaml new file mode 100644 index 000000000000..df78eca47113 --- /dev/null +++ b/sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_pii_entities_async.test_redacted_text.yaml @@ -0,0 +1,33 @@ +interactions: +- request: + body: '{"documents": [{"id": "0", "text": "My SSN is 859-98-0987.", "language": + "en"}]}' + headers: + Accept: + - application/json, text/json + Content-Length: + - '80' + Content-Type: + - application/json + User-Agent: + - azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit) + method: POST + uri: https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/recognition/pii?showStats=false&stringIndexType=UnicodeCodePoint + response: + body: + string: '{"documents":[{"redactedText":"My SSN is ***********.","id":"0","entities":[{"text":"859-98-0987","category":"U.S. + Social Security Number (SSN)","offset":10,"length":11,"confidenceScore":0.65}],"warnings":[]}],"errors":[],"modelVersion":"2020-07-01"}' + headers: + apim-request-id: dc638432-dc71-4f52-aadb-829c2dfd1935 + content-type: application/json; charset=utf-8 + csp-billing-usage: CognitiveServices.TextAnalytics.BatchScoring=1 + date: Mon, 31 Aug 2020 20:15:43 GMT + strict-transport-security: max-age=31536000; includeSubDomains; preload + transfer-encoding: chunked + x-content-type-options: nosniff + x-envoy-upstream-service-time: '80' + status: + code: 200 + message: OK + url: https://cognitiveusw2dev.azure-api.net//text/analytics/v3.1-preview.2/entities/recognition/pii?showStats=false&stringIndexType=UnicodeCodePoint +version: 1 diff --git a/sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_pii_entities_async.test_redacted_text_v3_1_preview_1.yaml b/sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_pii_entities_async.test_redacted_text_v3_1_preview_1.yaml new file mode 100644 index 000000000000..55b101e7b851 --- /dev/null +++ b/sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_pii_entities_async.test_redacted_text_v3_1_preview_1.yaml @@ -0,0 +1,33 @@ +interactions: +- request: + body: '{"documents": [{"id": "0", "text": "My SSN is 859-98-0987.", "language": + "en"}]}' + headers: + Accept: + - application/json, text/json + Content-Length: + - '80' + Content-Type: + - application/json + User-Agent: + - azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit) + method: POST + uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/entities/recognition/pii?showStats=false&stringIndexType=UnicodeCodePoint + response: + body: + string: '{"documents":[{"id":"0","entities":[{"text":"859-98-0987","category":"U.S. + Social Security Number (SSN)","offset":10,"length":11,"confidenceScore":0.65}],"warnings":[]}],"errors":[],"modelVersion":"2020-07-01"}' + headers: + apim-request-id: eeda4dd4-74dd-4e54-88cb-5a0352f065cf + content-type: application/json; charset=utf-8 + csp-billing-usage: CognitiveServices.TextAnalytics.BatchScoring=1 + date: Mon, 31 Aug 2020 19:58:17 GMT + strict-transport-security: max-age=31536000; includeSubDomains; preload + transfer-encoding: chunked + x-content-type-options: nosniff + x-envoy-upstream-service-time: '106' + status: + code: 200 + message: OK + url: https://westus2.api.cognitive.microsoft.com//text/analytics/v3.1-preview.1/entities/recognition/pii?showStats=false&stringIndexType=UnicodeCodePoint +version: 1 diff --git a/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_pii_entities.py b/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_pii_entities.py index fe12a81cf1d7..fbafdb0e446f 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_pii_entities.py +++ b/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_pii_entities.py @@ -3,7 +3,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # ------------------------------------ - +import os import pytest import platform import functools @@ -576,6 +576,24 @@ def test_recognize_pii_entities_v3(self, client): assert "'recognize_pii_entities' endpoint is only available for API version v3.1-preview.1 and up" in str(excinfo.value) + # currently only have this as playback since the dev endpoint is unreliable + @pytest.mark.playback_test_only + @GlobalTextAnalyticsAccountPreparer() + @TextAnalyticsClientPreparer(client_kwargs={ + "api_version": TextAnalyticsApiVersion.V3_1_PREVIEW_2, + "text_analytics_account_key": os.environ.get('AZURE_TEXT_ANALYTICS_KEY'), + "text_analytics_account": "https://cognitiveusw2dev.azure-api.net/" + }) + def test_redacted_text(self, client): + result = client.recognize_pii_entities(["My SSN is 859-98-0987."]) + self.assertEqual("My SSN is ***********.", result[0].redacted_text) + + @GlobalTextAnalyticsAccountPreparer() + @TextAnalyticsClientPreparer() + def test_redacted_text_v3_1_preview_1(self, client): + result = client.recognize_pii_entities(["My SSN is 859-98-0987."]) + self.assertIsNone(result[0].redacted_text) + @GlobalTextAnalyticsAccountPreparer() @TextAnalyticsClientPreparer() def test_phi_domain_filter(self, client): diff --git a/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_pii_entities_async.py b/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_pii_entities_async.py index ac673e423159..863504c2ebbc 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_pii_entities_async.py +++ b/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_pii_entities_async.py @@ -3,7 +3,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # ------------------------------------ - +import os import pytest import platform import functools @@ -574,6 +574,24 @@ async def test_recognize_pii_entities_v3(self, client): assert "'recognize_pii_entities' endpoint is only available for API version v3.1-preview.1 and up" in str(excinfo.value) + # currently only have this as playback since the dev endpoint is unreliable + @pytest.mark.playback_test_only + @GlobalTextAnalyticsAccountPreparer() + @TextAnalyticsClientPreparer(client_kwargs={ + "api_version": TextAnalyticsApiVersion.V3_1_PREVIEW_2, + "text_analytics_account_key": os.environ.get('AZURE_TEXT_ANALYTICS_KEY'), + "text_analytics_account": "https://cognitiveusw2dev.azure-api.net/" + }) + async def test_redacted_text(self, client): + result = await client.recognize_pii_entities(["My SSN is 859-98-0987."]) + self.assertEqual("My SSN is ***********.", result[0].redacted_text) + + @GlobalTextAnalyticsAccountPreparer() + @TextAnalyticsClientPreparer() + async def test_redacted_text_v3_1_preview_1(self, client): + result = await client.recognize_pii_entities(["My SSN is 859-98-0987."]) + self.assertIsNone(result[0].redacted_text) + @GlobalTextAnalyticsAccountPreparer() @TextAnalyticsClientPreparer() async def test_phi_domain_filter(self, client): diff --git a/sdk/textanalytics/azure-ai-textanalytics/tests/test_repr.py b/sdk/textanalytics/azure-ai-textanalytics/tests/test_repr.py index 8d0c1463f16e..cbe1b895ff3a 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/tests/test_repr.py +++ b/sdk/textanalytics/azure-ai-textanalytics/tests/test_repr.py @@ -290,11 +290,13 @@ def test_recognize_pii_entities_result(self, pii_entity, text_analytics_warning, model = _models.RecognizePiiEntitiesResult( id="1", entities=[pii_entity[0]], + redacted_text="***********", warnings=[text_analytics_warning[0]], statistics=text_document_statistics[0], is_error=False ) - model_repr = "RecognizePiiEntitiesResult(id=1, entities=[{}], warnings=[{}], statistics={}, is_error=False)".format( + model_repr = "RecognizePiiEntitiesResult(id=1, entities=[{}], redacted_text=***********, warnings=[{}], " \ + "statistics={}, is_error=False)".format( pii_entity[1], text_analytics_warning[1], text_document_statistics[1] )