[text analytics] PII updates for v5.1.0b6 (#17038)

Azure · Mar 5, 2021 · b6d2979 · b6d2979
1 parent 8265479
commit b6d2979
Show file tree

Hide file tree

Showing 20 changed files with 370 additions and 71 deletions.
diff --git a/sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md b/sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md
@@ -10,6 +10,8 @@
 
 **New Features**
 
+- Added parameter `categories_filter` to the `recognize_pii_entities` client method.
+- Added enum `PiiEntityCategoryType`.
 - Add property `normalized_text` to `HealthcareEntity`. This property is a normalized version of the `text` property that already
 exists on the `HealthcareEntity`
 - Add property `assertion` onto `HealthcareEntity`. This contains assertions about the entity itself, i.e. if the entity represents a diagnosis,

diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/__init__.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/__init__.py
@@ -46,7 +46,8 @@
 )
 from ._paging import AnalyzeHealthcareEntitiesResult
 from ._generated.v3_1_preview_4.models import (
-    RelationType as HealthcareEntityRelationType,
+    PiiCategory as PiiEntityCategoryType,
+    RelationType as HealthcareEntityRelationType
 )
 
 __all__ = [
@@ -87,6 +88,7 @@
     'RequestStatistics',
     'AnalyzeBatchActionsType',
     "AnalyzeBatchActionsError",
+    "PiiEntityCategoryType",
     "HealthcareEntityRelationType",
     "HealthcareEntityRelationRoleType",
 ]

diff --git a/...xtanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_generated/_operations_mixin.py b/...xtanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_generated/_operations_mixin.py
@@ -143,7 +143,7 @@ def begin_health(
         self,
         documents,  # type: List["_models.MultiLanguageInput"]
         model_version=None,  # type: Optional[str]
-        string_index_type="TextElements_v8",  # type: Optional[Union[str, "_models.StringIndexType"]]
+        string_index_type=None,  # type: Optional[Union[str, "_models.StringIndexType"]]
         **kwargs  # type: Any
     ):
         """Submit healthcare analysis job.

diff --git a/...alytics/azure-ai-textanalytics/azure/ai/textanalytics/_generated/aio/_operations_mixin.py b/...alytics/azure-ai-textanalytics/azure/ai/textanalytics/_generated/aio/_operations_mixin.py
@@ -139,7 +139,7 @@ async def begin_health(
         self,
         documents: List["_models.MultiLanguageInput"],
         model_version: Optional[str] = None,
-        string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
+        string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
         **kwargs
     ) -> AnalyzeHealthcareEntitiesAsyncLROPoller["_models.HealthcareJobState"]:
         """Submit healthcare analysis job.

diff --git a/...cs/azure-ai-textanalytics/azure/ai/textanalytics/_generated/v3_1_preview_4/_metadata.json b/...cs/azure-ai-textanalytics/azure/ai/textanalytics/_generated/v3_1_preview_4/_metadata.json
diff --git a/...xtanalytics/_generated/v3_1_preview_4/aio/operations/_text_analytics_client_operations.py b/...xtanalytics/_generated/v3_1_preview_4/aio/operations/_text_analytics_client_operations.py
@@ -409,7 +409,7 @@ async def _health_initial(
         self,
         documents: List["_models.MultiLanguageInput"],
         model_version: Optional[str] = None,
-        string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
+        string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
         **kwargs
     ) -> Optional["_models.HealthcareJobState"]:
         cls = kwargs.pop('cls', None)  # type: ClsType[Optional["_models.HealthcareJobState"]]
@@ -474,7 +474,7 @@ async def begin_health(
         self,
         documents: List["_models.MultiLanguageInput"],
         model_version: Optional[str] = None,
-        string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
+        string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
         **kwargs
     ) -> AnalyzeHealthcareEntitiesAsyncLROPoller["_models.HealthcareJobState"]:
         """Submit healthcare analysis job.
@@ -550,7 +550,7 @@ async def entities_recognition_general(
         documents: List["_models.MultiLanguageInput"],
         model_version: Optional[str] = None,
         show_stats: Optional[bool] = None,
-        string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
+        string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
         **kwargs
     ) -> "_models.EntitiesResult":
         """Named Entity Recognition.
@@ -637,7 +637,7 @@ async def entities_recognition_pii(
         model_version: Optional[str] = None,
         show_stats: Optional[bool] = None,
         domain: Optional[str] = None,
-        string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
+        string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
         pii_categories: Optional[List[Union[str, "_models.PiiCategory"]]] = None,
         **kwargs
     ) -> "_models.PiiResult":
@@ -734,7 +734,7 @@ async def entities_linking(
         documents: List["_models.MultiLanguageInput"],
         model_version: Optional[str] = None,
         show_stats: Optional[bool] = None,
-        string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
+        string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
         **kwargs
     ) -> "_models.EntityLinkingResult":
         """Linked entities from a well known knowledge base.
@@ -977,7 +977,7 @@ async def sentiment(
         model_version: Optional[str] = None,
         show_stats: Optional[bool] = None,
         opinion_mining: Optional[bool] = None,
-        string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
+        string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
         **kwargs
     ) -> "_models.SentimentResponse":
         """Sentiment.

diff --git a/...zure-ai-textanalytics/azure/ai/textanalytics/_generated/v3_1_preview_4/models/__init__.py b/...zure-ai-textanalytics/azure/ai/textanalytics/_generated/v3_1_preview_4/models/__init__.py
@@ -160,7 +160,6 @@
     SentenceSentimentValue,
     State,
     StringIndexType,
-    StringIndexTypeResponse,
     TargetRelationType,
     TokenSentimentValue,
     WarningCodeValue,
@@ -248,7 +247,6 @@
     'SentenceSentimentValue',
     'State',
     'StringIndexType',
-    'StringIndexTypeResponse',
     'TargetRelationType',
     'TokenSentimentValue',
     'WarningCodeValue',

diff --git a/...azure-ai-textanalytics/azure/ai/textanalytics/_generated/v3_1_preview_4/models/_models.py b/...azure-ai-textanalytics/azure/ai/textanalytics/_generated/v3_1_preview_4/models/_models.py
@@ -801,9 +801,8 @@ class EntitiesTaskParameters(msrest.serialization.Model):
     :param model_version:
     :type model_version: str
     :param string_index_type:  Possible values include: "TextElements_v8", "UnicodeCodePoint",
-     "Utf16CodeUnit". Default value: "TextElements_v8".
-    :type string_index_type: str or
-     ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
+     "Utf16CodeUnit".
+    :type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
     """
 
     _attribute_map = {
@@ -817,7 +816,7 @@ def __init__(
     ):
         super(EntitiesTaskParameters, self).__init__(**kwargs)
         self.model_version = kwargs.get('model_version', "latest")
-        self.string_index_type = kwargs.get('string_index_type', "TextElements_v8")
+        self.string_index_type = kwargs.get('string_index_type', None)
 
 
 class Entity(msrest.serialization.Model):
@@ -936,9 +935,8 @@ class EntityLinkingTaskParameters(msrest.serialization.Model):
     :param model_version:
     :type model_version: str
     :param string_index_type:  Possible values include: "TextElements_v8", "UnicodeCodePoint",
-     "Utf16CodeUnit". Default value: "TextElements_v8".
-    :type string_index_type: str or
-     ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
+     "Utf16CodeUnit".
+    :type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
     """
 
     _attribute_map = {
@@ -952,7 +950,7 @@ def __init__(
     ):
         super(EntityLinkingTaskParameters, self).__init__(**kwargs)
         self.model_version = kwargs.get('model_version', "latest")
-        self.string_index_type = kwargs.get('string_index_type', "TextElements_v8")
+        self.string_index_type = kwargs.get('string_index_type', None)
 
 
 class ErrorResponse(msrest.serialization.Model):
@@ -1792,9 +1790,8 @@ class PiiTaskParameters(msrest.serialization.Model):
     :param pii_categories: (Optional) describes the PII categories to return.
     :type pii_categories: list[str or ~azure.ai.textanalytics.v3_1_preview_4.models.PiiCategory]
     :param string_index_type:  Possible values include: "TextElements_v8", "UnicodeCodePoint",
-     "Utf16CodeUnit". Default value: "TextElements_v8".
-    :type string_index_type: str or
-     ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
+     "Utf16CodeUnit".
+    :type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
     """
 
     _validation = {
@@ -1804,7 +1801,7 @@ class PiiTaskParameters(msrest.serialization.Model):
     _attribute_map = {
         'domain': {'key': 'domain', 'type': 'str'},
         'model_version': {'key': 'model-version', 'type': 'str'},
-        'pii_categories': {'key': 'piiCategories', 'type': '[str]'},
+        'pii_categories': {'key': 'pii-categories', 'type': '[str]'},
         'string_index_type': {'key': 'stringIndexType', 'type': 'str'},
     }
 
@@ -1816,7 +1813,7 @@ def __init__(
         self.domain = kwargs.get('domain', "none")
         self.model_version = kwargs.get('model_version', "latest")
         self.pii_categories = kwargs.get('pii_categories', None)
-        self.string_index_type = kwargs.get('string_index_type', "TextElements_v8")
+        self.string_index_type = kwargs.get('string_index_type', None)
 
 
 class RequestStatistics(msrest.serialization.Model):

diff --git a/...e-ai-textanalytics/azure/ai/textanalytics/_generated/v3_1_preview_4/models/_models_py3.py b/...e-ai-textanalytics/azure/ai/textanalytics/_generated/v3_1_preview_4/models/_models_py3.py
@@ -900,9 +900,8 @@ class EntitiesTaskParameters(msrest.serialization.Model):
     :param model_version:
     :type model_version: str
     :param string_index_type:  Possible values include: "TextElements_v8", "UnicodeCodePoint",
-     "Utf16CodeUnit". Default value: "TextElements_v8".
-    :type string_index_type: str or
-     ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
+     "Utf16CodeUnit".
+    :type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
     """
 
     _attribute_map = {
@@ -914,7 +913,7 @@ def __init__(
         self,
         *,
         model_version: Optional[str] = "latest",
-        string_index_type: Optional[Union[str, "StringIndexTypeResponse"]] = "TextElements_v8",
+        string_index_type: Optional[Union[str, "StringIndexType"]] = None,
         **kwargs
     ):
         super(EntitiesTaskParameters, self).__init__(**kwargs)
@@ -1052,9 +1051,8 @@ class EntityLinkingTaskParameters(msrest.serialization.Model):
     :param model_version:
     :type model_version: str
     :param string_index_type:  Possible values include: "TextElements_v8", "UnicodeCodePoint",
-     "Utf16CodeUnit". Default value: "TextElements_v8".
-    :type string_index_type: str or
-     ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
+     "Utf16CodeUnit".
+    :type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
     """
 
     _attribute_map = {
@@ -1066,7 +1064,7 @@ def __init__(
         self,
         *,
         model_version: Optional[str] = "latest",
-        string_index_type: Optional[Union[str, "StringIndexTypeResponse"]] = "TextElements_v8",
+        string_index_type: Optional[Union[str, "StringIndexType"]] = None,
         **kwargs
     ):
         super(EntityLinkingTaskParameters, self).__init__(**kwargs)
@@ -2013,9 +2011,8 @@ class PiiTaskParameters(msrest.serialization.Model):
     :param pii_categories: (Optional) describes the PII categories to return.
     :type pii_categories: list[str or ~azure.ai.textanalytics.v3_1_preview_4.models.PiiCategory]
     :param string_index_type:  Possible values include: "TextElements_v8", "UnicodeCodePoint",
-     "Utf16CodeUnit". Default value: "TextElements_v8".
-    :type string_index_type: str or
-     ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
+     "Utf16CodeUnit".
+    :type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
     """
 
     _validation = {
@@ -2025,7 +2022,7 @@ class PiiTaskParameters(msrest.serialization.Model):
     _attribute_map = {
         'domain': {'key': 'domain', 'type': 'str'},
         'model_version': {'key': 'model-version', 'type': 'str'},
-        'pii_categories': {'key': 'piiCategories', 'type': '[str]'},
+        'pii_categories': {'key': 'pii-categories', 'type': '[str]'},
         'string_index_type': {'key': 'stringIndexType', 'type': 'str'},
     }
 
@@ -2035,7 +2032,7 @@ def __init__(
         domain: Optional[Union[str, "PiiTaskParametersDomain"]] = "none",
         model_version: Optional[str] = "latest",
         pii_categories: Optional[List[Union[str, "PiiCategory"]]] = None,
-        string_index_type: Optional[Union[str, "StringIndexTypeResponse"]] = "TextElements_v8",
+        string_index_type: Optional[Union[str, "StringIndexType"]] = None,
         **kwargs
     ):
         super(PiiTaskParameters, self).__init__(**kwargs)

diff --git a/...s/azure/ai/textanalytics/_generated/v3_1_preview_4/models/_text_analytics_client_enums.py b/...s/azure/ai/textanalytics/_generated/v3_1_preview_4/models/_text_analytics_client_enums.py
@@ -322,19 +322,6 @@ class StringIndexType(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
     #: application is written in a language that support Unicode, for example Java, JavaScript.
     UTF16_CODE_UNIT = "Utf16CodeUnit"
 
-class StringIndexTypeResponse(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
-
-    #: Returned offset and length values will correspond to TextElements (Graphemes and Grapheme
-    #: clusters) confirming to the Unicode 8.0.0 standard. Use this option if your application is
-    #: written in .Net Framework or .Net Core and you will be using StringInfo.
-    TEXT_ELEMENTS_V8 = "TextElements_v8"
-    #: Returned offset and length values will correspond to Unicode code points. Use this option if
-    #: your application is written in a language that support Unicode, for example Python.
-    UNICODE_CODE_POINT = "UnicodeCodePoint"
-    #: Returned offset and length values will correspond to UTF-16 code units. Use this option if your
-    #: application is written in a language that support Unicode, for example Java, JavaScript.
-    UTF16_CODE_UNIT = "Utf16CodeUnit"
-
 class TargetRelationType(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
     """The type related to the target.
     """

diff --git a/...i/textanalytics/_generated/v3_1_preview_4/operations/_text_analytics_client_operations.py b/...i/textanalytics/_generated/v3_1_preview_4/operations/_text_analytics_client_operations.py
@@ -419,7 +419,7 @@ def _health_initial(
         self,
         documents,  # type: List["_models.MultiLanguageInput"]
         model_version=None,  # type: Optional[str]
-        string_index_type="TextElements_v8",  # type: Optional[Union[str, "_models.StringIndexType"]]
+        string_index_type=None,  # type: Optional[Union[str, "_models.StringIndexType"]]
         **kwargs  # type: Any
     ):
         # type: (...) -> Optional["_models.HealthcareJobState"]
@@ -485,7 +485,7 @@ def begin_health(
         self,
         documents,  # type: List["_models.MultiLanguageInput"]
         model_version=None,  # type: Optional[str]
-        string_index_type="TextElements_v8",  # type: Optional[Union[str, "_models.StringIndexType"]]
+        string_index_type=None,  # type: Optional[Union[str, "_models.StringIndexType"]]
         **kwargs  # type: Any
     ):
         # type: (...) -> AnalyzeHealthcareEntitiesLROPoller["_models.HealthcareJobState"]
@@ -562,7 +562,7 @@ def entities_recognition_general(
         documents,  # type: List["_models.MultiLanguageInput"]
         model_version=None,  # type: Optional[str]
         show_stats=None,  # type: Optional[bool]
-        string_index_type="TextElements_v8",  # type: Optional[Union[str, "_models.StringIndexType"]]
+        string_index_type=None,  # type: Optional[Union[str, "_models.StringIndexType"]]
         **kwargs  # type: Any
     ):
         # type: (...) -> "_models.EntitiesResult"
@@ -650,7 +650,7 @@ def entities_recognition_pii(
         model_version=None,  # type: Optional[str]
         show_stats=None,  # type: Optional[bool]
         domain=None,  # type: Optional[str]
-        string_index_type="TextElements_v8",  # type: Optional[Union[str, "_models.StringIndexType"]]
+        string_index_type=None,  # type: Optional[Union[str, "_models.StringIndexType"]]
         pii_categories=None,  # type: Optional[List[Union[str, "_models.PiiCategory"]]]
         **kwargs  # type: Any
     ):
@@ -748,7 +748,7 @@ def entities_linking(
         documents,  # type: List["_models.MultiLanguageInput"]
         model_version=None,  # type: Optional[str]
         show_stats=None,  # type: Optional[bool]
-        string_index_type="TextElements_v8",  # type: Optional[Union[str, "_models.StringIndexType"]]
+        string_index_type=None,  # type: Optional[Union[str, "_models.StringIndexType"]]
         **kwargs  # type: Any
     ):
         # type: (...) -> "_models.EntityLinkingResult"
@@ -994,7 +994,7 @@ def sentiment(
         model_version=None,  # type: Optional[str]
         show_stats=None,  # type: Optional[bool]
         opinion_mining=None,  # type: Optional[bool]
-        string_index_type="TextElements_v8",  # type: Optional[Union[str, "_models.StringIndexType"]]
+        string_index_type=None,  # type: Optional[Union[str, "_models.StringIndexType"]]
         **kwargs  # type: Any
     ):
         # type: (...) -> "_models.SentimentResponse"

diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py
@@ -1485,8 +1485,11 @@ def __init__(self, **kwargs):
         self.string_index_type = kwargs.get("string_index_type", "UnicodeCodePoint")
 
     def __repr__(self, **kwargs):
-        return "RecognizePiiEntitiesAction(model_version={}, domain_filter={}, string_index_type={})" \
-            .format(self.model_version, self.domain_filter, self.string_index_type)[:1024]
+        return "RecognizePiiEntitiesAction(model_version={}, domain_filter={}, string_index_type={}".format(
+            self.model_version,
+            self.domain_filter,
+            self.string_index_type
+        )[:1024]
 
     def to_generated(self):
         return _latest_preview_models.PiiTask(

diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_text_analytics_client.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_text_analytics_client.py
@@ -18,7 +18,11 @@
 from azure.core.tracing.decorator import distributed_trace
 from azure.core.exceptions import HttpResponseError
 from ._base_client import TextAnalyticsClientBase
-from ._request_handlers import _validate_input, _determine_action_type, _check_string_index_type_arg
+from ._request_handlers import (
+    _validate_input,
+    _determine_action_type,
+    _check_string_index_type_arg
+)
 from ._response_handlers import (
     process_http_response_error,
     entities_result,
@@ -301,6 +305,11 @@ def recognize_pii_entities(  # type: ignore
             I.e., if set to 'phi', will only return entities in the Protected Healthcare Information domain.
             See https://aka.ms/tanerpii for more information.
         :paramtype domain_filter: str or ~azure.ai.textanalytics.PiiEntityDomainType
+        :keyword categories_filter: Instead of filtering over all PII entity categories, you can pass in a list of
+            the specific PII entity categories you want to filter out. For example, if you only want to filter out
+            U.S. social security numbers in a document, you can pass in
+            `[PiiEntityCategoryType.US_SOCIAL_SECURITY_NUMBER]` for this kwarg.
+        :paramtype categories_filter: list[~azure.ai.textanalytics.PiiEntityCategoryType]
         :keyword str string_index_type: Specifies the method used to interpret string offsets.
             `UnicodeCodePoint`, the Python encoding, is the default. To override the Python default,
             you can also pass in `Utf16CodePoint` or `TextElements_v8`. For additional information
@@ -327,6 +336,7 @@ def recognize_pii_entities(  # type: ignore
         model_version = kwargs.pop("model_version", None)
         show_stats = kwargs.pop("show_stats", False)
         domain_filter = kwargs.pop("domain_filter", None)
+        categories_filter = kwargs.pop("categories_filter", None)
 
         string_index_type = _check_string_index_type_arg(
             kwargs.pop("string_index_type", None),
@@ -342,6 +352,7 @@ def recognize_pii_entities(  # type: ignore
                 model_version=model_version,
                 show_stats=show_stats,
                 domain=domain_filter,
+                pii_categories=categories_filter,
                 cls=kwargs.pop("cls", pii_entities_result),
                 **kwargs
             )