Skip to content

Commit

Permalink
[text analytics] PII updates for v5.1.0b6 (#17038)
Browse files Browse the repository at this point in the history
  • Loading branch information
abhahn authored Mar 5, 2021
1 parent 8265479 commit b6d2979
Show file tree
Hide file tree
Showing 20 changed files with 370 additions and 71 deletions.
2 changes: 2 additions & 0 deletions sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

**New Features**

- Added parameter `categories_filter` to the `recognize_pii_entities` client method.
- Added enum `PiiEntityCategoryType`.
- Add property `normalized_text` to `HealthcareEntity`. This property is a normalized version of the `text` property that already
exists on the `HealthcareEntity`
- Add property `assertion` onto `HealthcareEntity`. This contains assertions about the entity itself, i.e. if the entity represents a diagnosis,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@
)
from ._paging import AnalyzeHealthcareEntitiesResult
from ._generated.v3_1_preview_4.models import (
RelationType as HealthcareEntityRelationType,
PiiCategory as PiiEntityCategoryType,
RelationType as HealthcareEntityRelationType
)

__all__ = [
Expand Down Expand Up @@ -87,6 +88,7 @@
'RequestStatistics',
'AnalyzeBatchActionsType',
"AnalyzeBatchActionsError",
"PiiEntityCategoryType",
"HealthcareEntityRelationType",
"HealthcareEntityRelationRoleType",
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def begin_health(
self,
documents, # type: List["_models.MultiLanguageInput"]
model_version=None, # type: Optional[str]
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
**kwargs # type: Any
):
"""Submit healthcare analysis job.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ async def begin_health(
self,
documents: List["_models.MultiLanguageInput"],
model_version: Optional[str] = None,
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
**kwargs
) -> AnalyzeHealthcareEntitiesAsyncLROPoller["_models.HealthcareJobState"]:
"""Submit healthcare analysis job.
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ async def _health_initial(
self,
documents: List["_models.MultiLanguageInput"],
model_version: Optional[str] = None,
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
**kwargs
) -> Optional["_models.HealthcareJobState"]:
cls = kwargs.pop('cls', None) # type: ClsType[Optional["_models.HealthcareJobState"]]
Expand Down Expand Up @@ -474,7 +474,7 @@ async def begin_health(
self,
documents: List["_models.MultiLanguageInput"],
model_version: Optional[str] = None,
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
**kwargs
) -> AnalyzeHealthcareEntitiesAsyncLROPoller["_models.HealthcareJobState"]:
"""Submit healthcare analysis job.
Expand Down Expand Up @@ -550,7 +550,7 @@ async def entities_recognition_general(
documents: List["_models.MultiLanguageInput"],
model_version: Optional[str] = None,
show_stats: Optional[bool] = None,
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
**kwargs
) -> "_models.EntitiesResult":
"""Named Entity Recognition.
Expand Down Expand Up @@ -637,7 +637,7 @@ async def entities_recognition_pii(
model_version: Optional[str] = None,
show_stats: Optional[bool] = None,
domain: Optional[str] = None,
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
pii_categories: Optional[List[Union[str, "_models.PiiCategory"]]] = None,
**kwargs
) -> "_models.PiiResult":
Expand Down Expand Up @@ -734,7 +734,7 @@ async def entities_linking(
documents: List["_models.MultiLanguageInput"],
model_version: Optional[str] = None,
show_stats: Optional[bool] = None,
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
**kwargs
) -> "_models.EntityLinkingResult":
"""Linked entities from a well known knowledge base.
Expand Down Expand Up @@ -977,7 +977,7 @@ async def sentiment(
model_version: Optional[str] = None,
show_stats: Optional[bool] = None,
opinion_mining: Optional[bool] = None,
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
**kwargs
) -> "_models.SentimentResponse":
"""Sentiment.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,6 @@
SentenceSentimentValue,
State,
StringIndexType,
StringIndexTypeResponse,
TargetRelationType,
TokenSentimentValue,
WarningCodeValue,
Expand Down Expand Up @@ -248,7 +247,6 @@
'SentenceSentimentValue',
'State',
'StringIndexType',
'StringIndexTypeResponse',
'TargetRelationType',
'TokenSentimentValue',
'WarningCodeValue',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -801,9 +801,8 @@ class EntitiesTaskParameters(msrest.serialization.Model):
:param model_version:
:type model_version: str
:param string_index_type: Possible values include: "TextElements_v8", "UnicodeCodePoint",
"Utf16CodeUnit". Default value: "TextElements_v8".
:type string_index_type: str or
~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
"Utf16CodeUnit".
:type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
"""

_attribute_map = {
Expand All @@ -817,7 +816,7 @@ def __init__(
):
super(EntitiesTaskParameters, self).__init__(**kwargs)
self.model_version = kwargs.get('model_version', "latest")
self.string_index_type = kwargs.get('string_index_type', "TextElements_v8")
self.string_index_type = kwargs.get('string_index_type', None)


class Entity(msrest.serialization.Model):
Expand Down Expand Up @@ -936,9 +935,8 @@ class EntityLinkingTaskParameters(msrest.serialization.Model):
:param model_version:
:type model_version: str
:param string_index_type: Possible values include: "TextElements_v8", "UnicodeCodePoint",
"Utf16CodeUnit". Default value: "TextElements_v8".
:type string_index_type: str or
~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
"Utf16CodeUnit".
:type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
"""

_attribute_map = {
Expand All @@ -952,7 +950,7 @@ def __init__(
):
super(EntityLinkingTaskParameters, self).__init__(**kwargs)
self.model_version = kwargs.get('model_version', "latest")
self.string_index_type = kwargs.get('string_index_type', "TextElements_v8")
self.string_index_type = kwargs.get('string_index_type', None)


class ErrorResponse(msrest.serialization.Model):
Expand Down Expand Up @@ -1792,9 +1790,8 @@ class PiiTaskParameters(msrest.serialization.Model):
:param pii_categories: (Optional) describes the PII categories to return.
:type pii_categories: list[str or ~azure.ai.textanalytics.v3_1_preview_4.models.PiiCategory]
:param string_index_type: Possible values include: "TextElements_v8", "UnicodeCodePoint",
"Utf16CodeUnit". Default value: "TextElements_v8".
:type string_index_type: str or
~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
"Utf16CodeUnit".
:type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
"""

_validation = {
Expand All @@ -1804,7 +1801,7 @@ class PiiTaskParameters(msrest.serialization.Model):
_attribute_map = {
'domain': {'key': 'domain', 'type': 'str'},
'model_version': {'key': 'model-version', 'type': 'str'},
'pii_categories': {'key': 'piiCategories', 'type': '[str]'},
'pii_categories': {'key': 'pii-categories', 'type': '[str]'},
'string_index_type': {'key': 'stringIndexType', 'type': 'str'},
}

Expand All @@ -1816,7 +1813,7 @@ def __init__(
self.domain = kwargs.get('domain', "none")
self.model_version = kwargs.get('model_version', "latest")
self.pii_categories = kwargs.get('pii_categories', None)
self.string_index_type = kwargs.get('string_index_type', "TextElements_v8")
self.string_index_type = kwargs.get('string_index_type', None)


class RequestStatistics(msrest.serialization.Model):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -900,9 +900,8 @@ class EntitiesTaskParameters(msrest.serialization.Model):
:param model_version:
:type model_version: str
:param string_index_type: Possible values include: "TextElements_v8", "UnicodeCodePoint",
"Utf16CodeUnit". Default value: "TextElements_v8".
:type string_index_type: str or
~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
"Utf16CodeUnit".
:type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
"""

_attribute_map = {
Expand All @@ -914,7 +913,7 @@ def __init__(
self,
*,
model_version: Optional[str] = "latest",
string_index_type: Optional[Union[str, "StringIndexTypeResponse"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "StringIndexType"]] = None,
**kwargs
):
super(EntitiesTaskParameters, self).__init__(**kwargs)
Expand Down Expand Up @@ -1052,9 +1051,8 @@ class EntityLinkingTaskParameters(msrest.serialization.Model):
:param model_version:
:type model_version: str
:param string_index_type: Possible values include: "TextElements_v8", "UnicodeCodePoint",
"Utf16CodeUnit". Default value: "TextElements_v8".
:type string_index_type: str or
~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
"Utf16CodeUnit".
:type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
"""

_attribute_map = {
Expand All @@ -1066,7 +1064,7 @@ def __init__(
self,
*,
model_version: Optional[str] = "latest",
string_index_type: Optional[Union[str, "StringIndexTypeResponse"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "StringIndexType"]] = None,
**kwargs
):
super(EntityLinkingTaskParameters, self).__init__(**kwargs)
Expand Down Expand Up @@ -2013,9 +2011,8 @@ class PiiTaskParameters(msrest.serialization.Model):
:param pii_categories: (Optional) describes the PII categories to return.
:type pii_categories: list[str or ~azure.ai.textanalytics.v3_1_preview_4.models.PiiCategory]
:param string_index_type: Possible values include: "TextElements_v8", "UnicodeCodePoint",
"Utf16CodeUnit". Default value: "TextElements_v8".
:type string_index_type: str or
~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
"Utf16CodeUnit".
:type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
"""

_validation = {
Expand All @@ -2025,7 +2022,7 @@ class PiiTaskParameters(msrest.serialization.Model):
_attribute_map = {
'domain': {'key': 'domain', 'type': 'str'},
'model_version': {'key': 'model-version', 'type': 'str'},
'pii_categories': {'key': 'piiCategories', 'type': '[str]'},
'pii_categories': {'key': 'pii-categories', 'type': '[str]'},
'string_index_type': {'key': 'stringIndexType', 'type': 'str'},
}

Expand All @@ -2035,7 +2032,7 @@ def __init__(
domain: Optional[Union[str, "PiiTaskParametersDomain"]] = "none",
model_version: Optional[str] = "latest",
pii_categories: Optional[List[Union[str, "PiiCategory"]]] = None,
string_index_type: Optional[Union[str, "StringIndexTypeResponse"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "StringIndexType"]] = None,
**kwargs
):
super(PiiTaskParameters, self).__init__(**kwargs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -322,19 +322,6 @@ class StringIndexType(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
#: application is written in a language that support Unicode, for example Java, JavaScript.
UTF16_CODE_UNIT = "Utf16CodeUnit"

class StringIndexTypeResponse(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):

#: Returned offset and length values will correspond to TextElements (Graphemes and Grapheme
#: clusters) confirming to the Unicode 8.0.0 standard. Use this option if your application is
#: written in .Net Framework or .Net Core and you will be using StringInfo.
TEXT_ELEMENTS_V8 = "TextElements_v8"
#: Returned offset and length values will correspond to Unicode code points. Use this option if
#: your application is written in a language that support Unicode, for example Python.
UNICODE_CODE_POINT = "UnicodeCodePoint"
#: Returned offset and length values will correspond to UTF-16 code units. Use this option if your
#: application is written in a language that support Unicode, for example Java, JavaScript.
UTF16_CODE_UNIT = "Utf16CodeUnit"

class TargetRelationType(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""The type related to the target.
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ def _health_initial(
self,
documents, # type: List["_models.MultiLanguageInput"]
model_version=None, # type: Optional[str]
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
**kwargs # type: Any
):
# type: (...) -> Optional["_models.HealthcareJobState"]
Expand Down Expand Up @@ -485,7 +485,7 @@ def begin_health(
self,
documents, # type: List["_models.MultiLanguageInput"]
model_version=None, # type: Optional[str]
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
**kwargs # type: Any
):
# type: (...) -> AnalyzeHealthcareEntitiesLROPoller["_models.HealthcareJobState"]
Expand Down Expand Up @@ -562,7 +562,7 @@ def entities_recognition_general(
documents, # type: List["_models.MultiLanguageInput"]
model_version=None, # type: Optional[str]
show_stats=None, # type: Optional[bool]
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
**kwargs # type: Any
):
# type: (...) -> "_models.EntitiesResult"
Expand Down Expand Up @@ -650,7 +650,7 @@ def entities_recognition_pii(
model_version=None, # type: Optional[str]
show_stats=None, # type: Optional[bool]
domain=None, # type: Optional[str]
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
pii_categories=None, # type: Optional[List[Union[str, "_models.PiiCategory"]]]
**kwargs # type: Any
):
Expand Down Expand Up @@ -748,7 +748,7 @@ def entities_linking(
documents, # type: List["_models.MultiLanguageInput"]
model_version=None, # type: Optional[str]
show_stats=None, # type: Optional[bool]
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
**kwargs # type: Any
):
# type: (...) -> "_models.EntityLinkingResult"
Expand Down Expand Up @@ -994,7 +994,7 @@ def sentiment(
model_version=None, # type: Optional[str]
show_stats=None, # type: Optional[bool]
opinion_mining=None, # type: Optional[bool]
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
**kwargs # type: Any
):
# type: (...) -> "_models.SentimentResponse"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1485,8 +1485,11 @@ def __init__(self, **kwargs):
self.string_index_type = kwargs.get("string_index_type", "UnicodeCodePoint")

def __repr__(self, **kwargs):
return "RecognizePiiEntitiesAction(model_version={}, domain_filter={}, string_index_type={})" \
.format(self.model_version, self.domain_filter, self.string_index_type)[:1024]
return "RecognizePiiEntitiesAction(model_version={}, domain_filter={}, string_index_type={}".format(
self.model_version,
self.domain_filter,
self.string_index_type
)[:1024]

def to_generated(self):
return _latest_preview_models.PiiTask(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@
from azure.core.tracing.decorator import distributed_trace
from azure.core.exceptions import HttpResponseError
from ._base_client import TextAnalyticsClientBase
from ._request_handlers import _validate_input, _determine_action_type, _check_string_index_type_arg
from ._request_handlers import (
_validate_input,
_determine_action_type,
_check_string_index_type_arg
)
from ._response_handlers import (
process_http_response_error,
entities_result,
Expand Down Expand Up @@ -301,6 +305,11 @@ def recognize_pii_entities( # type: ignore
I.e., if set to 'phi', will only return entities in the Protected Healthcare Information domain.
See https://aka.ms/tanerpii for more information.
:paramtype domain_filter: str or ~azure.ai.textanalytics.PiiEntityDomainType
:keyword categories_filter: Instead of filtering over all PII entity categories, you can pass in a list of
the specific PII entity categories you want to filter out. For example, if you only want to filter out
U.S. social security numbers in a document, you can pass in
`[PiiEntityCategoryType.US_SOCIAL_SECURITY_NUMBER]` for this kwarg.
:paramtype categories_filter: list[~azure.ai.textanalytics.PiiEntityCategoryType]
:keyword str string_index_type: Specifies the method used to interpret string offsets.
`UnicodeCodePoint`, the Python encoding, is the default. To override the Python default,
you can also pass in `Utf16CodePoint` or `TextElements_v8`. For additional information
Expand All @@ -327,6 +336,7 @@ def recognize_pii_entities( # type: ignore
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
domain_filter = kwargs.pop("domain_filter", None)
categories_filter = kwargs.pop("categories_filter", None)

string_index_type = _check_string_index_type_arg(
kwargs.pop("string_index_type", None),
Expand All @@ -342,6 +352,7 @@ def recognize_pii_entities( # type: ignore
model_version=model_version,
show_stats=show_stats,
domain=domain_filter,
pii_categories=categories_filter,
cls=kwargs.pop("cls", pii_entities_result),
**kwargs
)
Expand Down
Loading

0 comments on commit b6d2979

Please sign in to comment.