Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[text analytics] PII updates for v5.1.0b6 #17038

Merged
merged 8 commits into from
Mar 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

**New Features**

- Added parameter `categories_filter` to the `recognize_pii_entities` client method.
- Added enum `PiiEntityCategoryType`.
- Add property `normalized_text` to `HealthcareEntity`. This property is a normalized version of the `text` property that already
exists on the `HealthcareEntity`
- Add property `assertion` onto `HealthcareEntity`. This contains assertions about the entity itself, i.e. if the entity represents a diagnosis,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@
)
from ._paging import AnalyzeHealthcareEntitiesResult
from ._generated.v3_1_preview_4.models import (
RelationType as HealthcareEntityRelationType,
PiiCategory as PiiEntityCategoryType,
RelationType as HealthcareEntityRelationType
)

__all__ = [
Expand Down Expand Up @@ -87,6 +88,7 @@
'RequestStatistics',
'AnalyzeBatchActionsType',
"AnalyzeBatchActionsError",
"PiiEntityCategoryType",
"HealthcareEntityRelationType",
"HealthcareEntityRelationRoleType",
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def begin_health(
self,
documents, # type: List["_models.MultiLanguageInput"]
model_version=None, # type: Optional[str]
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
**kwargs # type: Any
):
"""Submit healthcare analysis job.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ async def begin_health(
self,
documents: List["_models.MultiLanguageInput"],
model_version: Optional[str] = None,
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
**kwargs
) -> AnalyzeHealthcareEntitiesAsyncLROPoller["_models.HealthcareJobState"]:
"""Submit healthcare analysis job.
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ async def _health_initial(
self,
documents: List["_models.MultiLanguageInput"],
model_version: Optional[str] = None,
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
**kwargs
) -> Optional["_models.HealthcareJobState"]:
cls = kwargs.pop('cls', None) # type: ClsType[Optional["_models.HealthcareJobState"]]
Expand Down Expand Up @@ -474,7 +474,7 @@ async def begin_health(
self,
documents: List["_models.MultiLanguageInput"],
model_version: Optional[str] = None,
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
**kwargs
) -> AnalyzeHealthcareEntitiesAsyncLROPoller["_models.HealthcareJobState"]:
"""Submit healthcare analysis job.
Expand Down Expand Up @@ -550,7 +550,7 @@ async def entities_recognition_general(
documents: List["_models.MultiLanguageInput"],
model_version: Optional[str] = None,
show_stats: Optional[bool] = None,
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
**kwargs
) -> "_models.EntitiesResult":
"""Named Entity Recognition.
Expand Down Expand Up @@ -637,7 +637,7 @@ async def entities_recognition_pii(
model_version: Optional[str] = None,
show_stats: Optional[bool] = None,
domain: Optional[str] = None,
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
pii_categories: Optional[List[Union[str, "_models.PiiCategory"]]] = None,
**kwargs
) -> "_models.PiiResult":
Expand Down Expand Up @@ -734,7 +734,7 @@ async def entities_linking(
documents: List["_models.MultiLanguageInput"],
model_version: Optional[str] = None,
show_stats: Optional[bool] = None,
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
**kwargs
) -> "_models.EntityLinkingResult":
"""Linked entities from a well known knowledge base.
Expand Down Expand Up @@ -977,7 +977,7 @@ async def sentiment(
model_version: Optional[str] = None,
show_stats: Optional[bool] = None,
opinion_mining: Optional[bool] = None,
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
**kwargs
) -> "_models.SentimentResponse":
"""Sentiment.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,6 @@
SentenceSentimentValue,
State,
StringIndexType,
StringIndexTypeResponse,
TargetRelationType,
TokenSentimentValue,
WarningCodeValue,
Expand Down Expand Up @@ -248,7 +247,6 @@
'SentenceSentimentValue',
'State',
'StringIndexType',
'StringIndexTypeResponse',
'TargetRelationType',
'TokenSentimentValue',
'WarningCodeValue',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -801,9 +801,8 @@ class EntitiesTaskParameters(msrest.serialization.Model):
:param model_version:
:type model_version: str
:param string_index_type: Possible values include: "TextElements_v8", "UnicodeCodePoint",
"Utf16CodeUnit". Default value: "TextElements_v8".
:type string_index_type: str or
~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
"Utf16CodeUnit".
:type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
"""

_attribute_map = {
Expand All @@ -817,7 +816,7 @@ def __init__(
):
super(EntitiesTaskParameters, self).__init__(**kwargs)
self.model_version = kwargs.get('model_version', "latest")
self.string_index_type = kwargs.get('string_index_type', "TextElements_v8")
self.string_index_type = kwargs.get('string_index_type', None)


class Entity(msrest.serialization.Model):
Expand Down Expand Up @@ -936,9 +935,8 @@ class EntityLinkingTaskParameters(msrest.serialization.Model):
:param model_version:
:type model_version: str
:param string_index_type: Possible values include: "TextElements_v8", "UnicodeCodePoint",
"Utf16CodeUnit". Default value: "TextElements_v8".
:type string_index_type: str or
~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
"Utf16CodeUnit".
:type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
"""

_attribute_map = {
Expand All @@ -952,7 +950,7 @@ def __init__(
):
super(EntityLinkingTaskParameters, self).__init__(**kwargs)
self.model_version = kwargs.get('model_version', "latest")
self.string_index_type = kwargs.get('string_index_type', "TextElements_v8")
self.string_index_type = kwargs.get('string_index_type', None)


class ErrorResponse(msrest.serialization.Model):
Expand Down Expand Up @@ -1792,9 +1790,8 @@ class PiiTaskParameters(msrest.serialization.Model):
:param pii_categories: (Optional) describes the PII categories to return.
:type pii_categories: list[str or ~azure.ai.textanalytics.v3_1_preview_4.models.PiiCategory]
:param string_index_type: Possible values include: "TextElements_v8", "UnicodeCodePoint",
"Utf16CodeUnit". Default value: "TextElements_v8".
:type string_index_type: str or
~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
"Utf16CodeUnit".
:type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
"""

_validation = {
Expand All @@ -1804,7 +1801,7 @@ class PiiTaskParameters(msrest.serialization.Model):
_attribute_map = {
'domain': {'key': 'domain', 'type': 'str'},
'model_version': {'key': 'model-version', 'type': 'str'},
'pii_categories': {'key': 'piiCategories', 'type': '[str]'},
'pii_categories': {'key': 'pii-categories', 'type': '[str]'},
'string_index_type': {'key': 'stringIndexType', 'type': 'str'},
}

Expand All @@ -1816,7 +1813,7 @@ def __init__(
self.domain = kwargs.get('domain', "none")
self.model_version = kwargs.get('model_version', "latest")
self.pii_categories = kwargs.get('pii_categories', None)
self.string_index_type = kwargs.get('string_index_type', "TextElements_v8")
self.string_index_type = kwargs.get('string_index_type', None)


class RequestStatistics(msrest.serialization.Model):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -900,9 +900,8 @@ class EntitiesTaskParameters(msrest.serialization.Model):
:param model_version:
:type model_version: str
:param string_index_type: Possible values include: "TextElements_v8", "UnicodeCodePoint",
"Utf16CodeUnit". Default value: "TextElements_v8".
:type string_index_type: str or
~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
"Utf16CodeUnit".
:type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
"""

_attribute_map = {
Expand All @@ -914,7 +913,7 @@ def __init__(
self,
*,
model_version: Optional[str] = "latest",
string_index_type: Optional[Union[str, "StringIndexTypeResponse"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "StringIndexType"]] = None,
**kwargs
):
super(EntitiesTaskParameters, self).__init__(**kwargs)
Expand Down Expand Up @@ -1052,9 +1051,8 @@ class EntityLinkingTaskParameters(msrest.serialization.Model):
:param model_version:
:type model_version: str
:param string_index_type: Possible values include: "TextElements_v8", "UnicodeCodePoint",
"Utf16CodeUnit". Default value: "TextElements_v8".
:type string_index_type: str or
~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
"Utf16CodeUnit".
:type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
"""

_attribute_map = {
Expand All @@ -1066,7 +1064,7 @@ def __init__(
self,
*,
model_version: Optional[str] = "latest",
string_index_type: Optional[Union[str, "StringIndexTypeResponse"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "StringIndexType"]] = None,
**kwargs
):
super(EntityLinkingTaskParameters, self).__init__(**kwargs)
Expand Down Expand Up @@ -2013,9 +2011,8 @@ class PiiTaskParameters(msrest.serialization.Model):
:param pii_categories: (Optional) describes the PII categories to return.
:type pii_categories: list[str or ~azure.ai.textanalytics.v3_1_preview_4.models.PiiCategory]
:param string_index_type: Possible values include: "TextElements_v8", "UnicodeCodePoint",
"Utf16CodeUnit". Default value: "TextElements_v8".
:type string_index_type: str or
~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
"Utf16CodeUnit".
:type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
"""

_validation = {
Expand All @@ -2025,7 +2022,7 @@ class PiiTaskParameters(msrest.serialization.Model):
_attribute_map = {
'domain': {'key': 'domain', 'type': 'str'},
'model_version': {'key': 'model-version', 'type': 'str'},
'pii_categories': {'key': 'piiCategories', 'type': '[str]'},
'pii_categories': {'key': 'pii-categories', 'type': '[str]'},
'string_index_type': {'key': 'stringIndexType', 'type': 'str'},
}

Expand All @@ -2035,7 +2032,7 @@ def __init__(
domain: Optional[Union[str, "PiiTaskParametersDomain"]] = "none",
model_version: Optional[str] = "latest",
pii_categories: Optional[List[Union[str, "PiiCategory"]]] = None,
string_index_type: Optional[Union[str, "StringIndexTypeResponse"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "StringIndexType"]] = None,
**kwargs
):
super(PiiTaskParameters, self).__init__(**kwargs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -322,19 +322,6 @@ class StringIndexType(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
#: application is written in a language that support Unicode, for example Java, JavaScript.
UTF16_CODE_UNIT = "Utf16CodeUnit"

class StringIndexTypeResponse(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):

#: Returned offset and length values will correspond to TextElements (Graphemes and Grapheme
#: clusters) confirming to the Unicode 8.0.0 standard. Use this option if your application is
#: written in .Net Framework or .Net Core and you will be using StringInfo.
TEXT_ELEMENTS_V8 = "TextElements_v8"
#: Returned offset and length values will correspond to Unicode code points. Use this option if
#: your application is written in a language that support Unicode, for example Python.
UNICODE_CODE_POINT = "UnicodeCodePoint"
#: Returned offset and length values will correspond to UTF-16 code units. Use this option if your
#: application is written in a language that support Unicode, for example Java, JavaScript.
UTF16_CODE_UNIT = "Utf16CodeUnit"

class TargetRelationType(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""The type related to the target.
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ def _health_initial(
self,
documents, # type: List["_models.MultiLanguageInput"]
model_version=None, # type: Optional[str]
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
**kwargs # type: Any
):
# type: (...) -> Optional["_models.HealthcareJobState"]
Expand Down Expand Up @@ -485,7 +485,7 @@ def begin_health(
self,
documents, # type: List["_models.MultiLanguageInput"]
model_version=None, # type: Optional[str]
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
**kwargs # type: Any
):
# type: (...) -> AnalyzeHealthcareEntitiesLROPoller["_models.HealthcareJobState"]
Expand Down Expand Up @@ -562,7 +562,7 @@ def entities_recognition_general(
documents, # type: List["_models.MultiLanguageInput"]
model_version=None, # type: Optional[str]
show_stats=None, # type: Optional[bool]
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
**kwargs # type: Any
):
# type: (...) -> "_models.EntitiesResult"
Expand Down Expand Up @@ -650,7 +650,7 @@ def entities_recognition_pii(
model_version=None, # type: Optional[str]
show_stats=None, # type: Optional[bool]
domain=None, # type: Optional[str]
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
pii_categories=None, # type: Optional[List[Union[str, "_models.PiiCategory"]]]
**kwargs # type: Any
):
Expand Down Expand Up @@ -748,7 +748,7 @@ def entities_linking(
documents, # type: List["_models.MultiLanguageInput"]
model_version=None, # type: Optional[str]
show_stats=None, # type: Optional[bool]
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
**kwargs # type: Any
):
# type: (...) -> "_models.EntityLinkingResult"
Expand Down Expand Up @@ -994,7 +994,7 @@ def sentiment(
model_version=None, # type: Optional[str]
show_stats=None, # type: Optional[bool]
opinion_mining=None, # type: Optional[bool]
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
**kwargs # type: Any
):
# type: (...) -> "_models.SentimentResponse"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1485,8 +1485,11 @@ def __init__(self, **kwargs):
self.string_index_type = kwargs.get("string_index_type", "UnicodeCodePoint")

def __repr__(self, **kwargs):
return "RecognizePiiEntitiesAction(model_version={}, domain_filter={}, string_index_type={})" \
.format(self.model_version, self.domain_filter, self.string_index_type)[:1024]
return "RecognizePiiEntitiesAction(model_version={}, domain_filter={}, string_index_type={}".format(
self.model_version,
self.domain_filter,
self.string_index_type
)[:1024]

def to_generated(self):
return _latest_preview_models.PiiTask(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@
from azure.core.tracing.decorator import distributed_trace
from azure.core.exceptions import HttpResponseError
from ._base_client import TextAnalyticsClientBase
from ._request_handlers import _validate_input, _determine_action_type, _check_string_index_type_arg
from ._request_handlers import (
_validate_input,
_determine_action_type,
_check_string_index_type_arg
)
from ._response_handlers import (
process_http_response_error,
entities_result,
Expand Down Expand Up @@ -301,6 +305,11 @@ def recognize_pii_entities( # type: ignore
I.e., if set to 'phi', will only return entities in the Protected Healthcare Information domain.
See https://aka.ms/tanerpii for more information.
:paramtype domain_filter: str or ~azure.ai.textanalytics.PiiEntityDomainType
:keyword categories_filter: Instead of filtering over all PII entity categories, you can pass in a list of
the specific PII entity categories you want to filter out. For example, if you only want to filter out
U.S. social security numbers in a document, you can pass in
`[PiiEntityCategoryType.US_SOCIAL_SECURITY_NUMBER]` for this kwarg.
:paramtype categories_filter: list[~azure.ai.textanalytics.PiiEntityCategoryType]
:keyword str string_index_type: Specifies the method used to interpret string offsets.
`UnicodeCodePoint`, the Python encoding, is the default. To override the Python default,
you can also pass in `Utf16CodePoint` or `TextElements_v8`. For additional information
Expand All @@ -327,6 +336,7 @@ def recognize_pii_entities( # type: ignore
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
domain_filter = kwargs.pop("domain_filter", None)
categories_filter = kwargs.pop("categories_filter", None)

string_index_type = _check_string_index_type_arg(
kwargs.pop("string_index_type", None),
Expand All @@ -342,6 +352,7 @@ def recognize_pii_entities( # type: ignore
model_version=model_version,
show_stats=show_stats,
domain=domain_filter,
pii_categories=categories_filter,
cls=kwargs.pop("cls", pii_entities_result),
**kwargs
)
Expand Down
Loading