Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[text analytics] PII updates for v5.1.0b6 #17038

Merged
merged 8 commits into from
Mar 5, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@

**New Features**

- Added parameter `categories_filter` to the `recognize_pii_entities` client method.
- Added `categries_filter` property to class `RecognizePiiEntitiesAction`.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

v tiny nit: categories_filter

- Added enum `PiiEntityCategoryType`.
- Add property `normalized_text` to `HealthcareEntity`. This property is a normalized version of the `text` property that already
exists on the `HealthcareEntity`

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,10 @@
HealthcareEntityRelationRoleType,
)
from ._paging import AnalyzeHealthcareEntitiesResult
from ._generated.v3_1_preview_4.models import RelationType as HealthcareEntityRelationType
from ._generated.v3_1_preview_4.models import (
PiiCategory as PiiEntityCategoryType,
RelationType as HealthcareEntityRelationType
)

__all__ = [
'TextAnalyticsApiVersion',
Expand Down Expand Up @@ -85,6 +88,7 @@
'RequestStatistics',
'AnalyzeBatchActionsType',
"AnalyzeBatchActionsError",
"PiiEntityCategoryType",
"HealthcareEntityRelationType",
"HealthcareEntityRelationRoleType",
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def begin_health(
self,
documents, # type: List["_models.MultiLanguageInput"]
model_version=None, # type: Optional[str]
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
**kwargs # type: Any
):
"""Submit healthcare analysis job.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ async def begin_health(
self,
documents: List["_models.MultiLanguageInput"],
model_version: Optional[str] = None,
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
**kwargs
) -> AnalyzeHealthcareEntitiesAsyncLROPoller["_models.HealthcareJobState"]:
"""Submit healthcare analysis job.
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ async def _health_initial(
self,
documents: List["_models.MultiLanguageInput"],
model_version: Optional[str] = None,
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
**kwargs
) -> Optional["_models.HealthcareJobState"]:
cls = kwargs.pop('cls', None) # type: ClsType[Optional["_models.HealthcareJobState"]]
Expand Down Expand Up @@ -474,7 +474,7 @@ async def begin_health(
self,
documents: List["_models.MultiLanguageInput"],
model_version: Optional[str] = None,
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
**kwargs
) -> AnalyzeHealthcareEntitiesAsyncLROPoller["_models.HealthcareJobState"]:
"""Submit healthcare analysis job.
Expand Down Expand Up @@ -550,7 +550,7 @@ async def entities_recognition_general(
documents: List["_models.MultiLanguageInput"],
model_version: Optional[str] = None,
show_stats: Optional[bool] = None,
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
**kwargs
) -> "_models.EntitiesResult":
"""Named Entity Recognition.
Expand Down Expand Up @@ -637,7 +637,7 @@ async def entities_recognition_pii(
model_version: Optional[str] = None,
show_stats: Optional[bool] = None,
domain: Optional[str] = None,
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
pii_categories: Optional[List[Union[str, "_models.PiiCategory"]]] = None,
**kwargs
) -> "_models.PiiResult":
Expand Down Expand Up @@ -734,7 +734,7 @@ async def entities_linking(
documents: List["_models.MultiLanguageInput"],
model_version: Optional[str] = None,
show_stats: Optional[bool] = None,
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
**kwargs
) -> "_models.EntityLinkingResult":
"""Linked entities from a well known knowledge base.
Expand Down Expand Up @@ -977,7 +977,7 @@ async def sentiment(
model_version: Optional[str] = None,
show_stats: Optional[bool] = None,
opinion_mining: Optional[bool] = None,
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
**kwargs
) -> "_models.SentimentResponse":
"""Sentiment.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,6 @@
SentenceSentimentValue,
State,
StringIndexType,
StringIndexTypeResponse,
TargetRelationType,
TokenSentimentValue,
WarningCodeValue,
Expand Down Expand Up @@ -248,7 +247,6 @@
'SentenceSentimentValue',
'State',
'StringIndexType',
'StringIndexTypeResponse',
'TargetRelationType',
'TokenSentimentValue',
'WarningCodeValue',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -801,9 +801,8 @@ class EntitiesTaskParameters(msrest.serialization.Model):
:param model_version:
:type model_version: str
:param string_index_type: Possible values include: "TextElements_v8", "UnicodeCodePoint",
"Utf16CodeUnit". Default value: "TextElements_v8".
:type string_index_type: str or
~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
"Utf16CodeUnit".
:type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
"""

_attribute_map = {
Expand All @@ -817,7 +816,7 @@ def __init__(
):
super(EntitiesTaskParameters, self).__init__(**kwargs)
self.model_version = kwargs.get('model_version', "latest")
self.string_index_type = kwargs.get('string_index_type', "TextElements_v8")
self.string_index_type = kwargs.get('string_index_type', None)


class Entity(msrest.serialization.Model):
Expand Down Expand Up @@ -936,9 +935,8 @@ class EntityLinkingTaskParameters(msrest.serialization.Model):
:param model_version:
:type model_version: str
:param string_index_type: Possible values include: "TextElements_v8", "UnicodeCodePoint",
"Utf16CodeUnit". Default value: "TextElements_v8".
:type string_index_type: str or
~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
"Utf16CodeUnit".
:type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
"""

_attribute_map = {
Expand All @@ -952,7 +950,7 @@ def __init__(
):
super(EntityLinkingTaskParameters, self).__init__(**kwargs)
self.model_version = kwargs.get('model_version', "latest")
self.string_index_type = kwargs.get('string_index_type', "TextElements_v8")
self.string_index_type = kwargs.get('string_index_type', None)


class ErrorResponse(msrest.serialization.Model):
Expand Down Expand Up @@ -1792,9 +1790,8 @@ class PiiTaskParameters(msrest.serialization.Model):
:param pii_categories: (Optional) describes the PII categories to return.
:type pii_categories: list[str or ~azure.ai.textanalytics.v3_1_preview_4.models.PiiCategory]
:param string_index_type: Possible values include: "TextElements_v8", "UnicodeCodePoint",
"Utf16CodeUnit". Default value: "TextElements_v8".
:type string_index_type: str or
~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
"Utf16CodeUnit".
:type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
"""

_validation = {
Expand All @@ -1804,7 +1801,7 @@ class PiiTaskParameters(msrest.serialization.Model):
_attribute_map = {
'domain': {'key': 'domain', 'type': 'str'},
'model_version': {'key': 'model-version', 'type': 'str'},
'pii_categories': {'key': 'piiCategories', 'type': '[str]'},
'pii_categories': {'key': 'pii-categories', 'type': '[str]'},
'string_index_type': {'key': 'stringIndexType', 'type': 'str'},
}

Expand All @@ -1816,7 +1813,7 @@ def __init__(
self.domain = kwargs.get('domain', "none")
self.model_version = kwargs.get('model_version', "latest")
self.pii_categories = kwargs.get('pii_categories', None)
self.string_index_type = kwargs.get('string_index_type', "TextElements_v8")
self.string_index_type = kwargs.get('string_index_type', None)


class RequestStatistics(msrest.serialization.Model):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -900,9 +900,8 @@ class EntitiesTaskParameters(msrest.serialization.Model):
:param model_version:
:type model_version: str
:param string_index_type: Possible values include: "TextElements_v8", "UnicodeCodePoint",
"Utf16CodeUnit". Default value: "TextElements_v8".
:type string_index_type: str or
~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
"Utf16CodeUnit".
:type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
"""

_attribute_map = {
Expand All @@ -914,7 +913,7 @@ def __init__(
self,
*,
model_version: Optional[str] = "latest",
string_index_type: Optional[Union[str, "StringIndexTypeResponse"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "StringIndexType"]] = None,
**kwargs
):
super(EntitiesTaskParameters, self).__init__(**kwargs)
Expand Down Expand Up @@ -1052,9 +1051,8 @@ class EntityLinkingTaskParameters(msrest.serialization.Model):
:param model_version:
:type model_version: str
:param string_index_type: Possible values include: "TextElements_v8", "UnicodeCodePoint",
"Utf16CodeUnit". Default value: "TextElements_v8".
:type string_index_type: str or
~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
"Utf16CodeUnit".
:type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
"""

_attribute_map = {
Expand All @@ -1066,7 +1064,7 @@ def __init__(
self,
*,
model_version: Optional[str] = "latest",
string_index_type: Optional[Union[str, "StringIndexTypeResponse"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "StringIndexType"]] = None,
**kwargs
):
super(EntityLinkingTaskParameters, self).__init__(**kwargs)
Expand Down Expand Up @@ -2013,9 +2011,8 @@ class PiiTaskParameters(msrest.serialization.Model):
:param pii_categories: (Optional) describes the PII categories to return.
:type pii_categories: list[str or ~azure.ai.textanalytics.v3_1_preview_4.models.PiiCategory]
:param string_index_type: Possible values include: "TextElements_v8", "UnicodeCodePoint",
"Utf16CodeUnit". Default value: "TextElements_v8".
:type string_index_type: str or
~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
"Utf16CodeUnit".
:type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
"""

_validation = {
Expand All @@ -2025,7 +2022,7 @@ class PiiTaskParameters(msrest.serialization.Model):
_attribute_map = {
'domain': {'key': 'domain', 'type': 'str'},
'model_version': {'key': 'model-version', 'type': 'str'},
'pii_categories': {'key': 'piiCategories', 'type': '[str]'},
'pii_categories': {'key': 'pii-categories', 'type': '[str]'},
'string_index_type': {'key': 'stringIndexType', 'type': 'str'},
}

Expand All @@ -2035,7 +2032,7 @@ def __init__(
domain: Optional[Union[str, "PiiTaskParametersDomain"]] = "none",
model_version: Optional[str] = "latest",
pii_categories: Optional[List[Union[str, "PiiCategory"]]] = None,
string_index_type: Optional[Union[str, "StringIndexTypeResponse"]] = "TextElements_v8",
string_index_type: Optional[Union[str, "StringIndexType"]] = None,
**kwargs
):
super(PiiTaskParameters, self).__init__(**kwargs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -322,19 +322,6 @@ class StringIndexType(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
#: application is written in a language that support Unicode, for example Java, JavaScript.
UTF16_CODE_UNIT = "Utf16CodeUnit"

class StringIndexTypeResponse(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):

#: Returned offset and length values will correspond to TextElements (Graphemes and Grapheme
#: clusters) confirming to the Unicode 8.0.0 standard. Use this option if your application is
#: written in .Net Framework or .Net Core and you will be using StringInfo.
TEXT_ELEMENTS_V8 = "TextElements_v8"
#: Returned offset and length values will correspond to Unicode code points. Use this option if
#: your application is written in a language that support Unicode, for example Python.
UNICODE_CODE_POINT = "UnicodeCodePoint"
#: Returned offset and length values will correspond to UTF-16 code units. Use this option if your
#: application is written in a language that support Unicode, for example Java, JavaScript.
UTF16_CODE_UNIT = "Utf16CodeUnit"

class TargetRelationType(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""The type related to the target.
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ def _health_initial(
self,
documents, # type: List["_models.MultiLanguageInput"]
model_version=None, # type: Optional[str]
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
**kwargs # type: Any
):
# type: (...) -> Optional["_models.HealthcareJobState"]
Expand Down Expand Up @@ -485,7 +485,7 @@ def begin_health(
self,
documents, # type: List["_models.MultiLanguageInput"]
model_version=None, # type: Optional[str]
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
**kwargs # type: Any
):
# type: (...) -> AnalyzeHealthcareEntitiesLROPoller["_models.HealthcareJobState"]
Expand Down Expand Up @@ -562,7 +562,7 @@ def entities_recognition_general(
documents, # type: List["_models.MultiLanguageInput"]
model_version=None, # type: Optional[str]
show_stats=None, # type: Optional[bool]
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
**kwargs # type: Any
):
# type: (...) -> "_models.EntitiesResult"
Expand Down Expand Up @@ -650,7 +650,7 @@ def entities_recognition_pii(
model_version=None, # type: Optional[str]
show_stats=None, # type: Optional[bool]
domain=None, # type: Optional[str]
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
pii_categories=None, # type: Optional[List[Union[str, "_models.PiiCategory"]]]
**kwargs # type: Any
):
Expand Down Expand Up @@ -748,7 +748,7 @@ def entities_linking(
documents, # type: List["_models.MultiLanguageInput"]
model_version=None, # type: Optional[str]
show_stats=None, # type: Optional[bool]
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
**kwargs # type: Any
):
# type: (...) -> "_models.EntityLinkingResult"
Expand Down Expand Up @@ -994,7 +994,7 @@ def sentiment(
model_version=None, # type: Optional[str]
show_stats=None, # type: Optional[bool]
opinion_mining=None, # type: Optional[bool]
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
**kwargs # type: Any
):
# type: (...) -> "_models.SentimentResponse"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1410,13 +1410,21 @@ class RecognizePiiEntitiesAction(DictMixin):
:keyword str model_version: The model version to use for the analysis.
:keyword str domain_filter: An optional string to set the PII domain to include only a
subset of the PII entity categories. Possible values include 'phi' or None.
:keyword categories_filter: A list of specific PII entity categories to return. If the value of `domain_filter`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

idk if it's worth to mention domain_filter in this docstring. Following UX studies I've been trying to put more examples in the docstrings, so the docstring could be something like

:keyword categories_filter: Instead of filtering over all PII entity categories, you can pass in a list of the specific PII entity categories you want to filter out. For example, if you only want to filter out U.S. social security numbers in a document, you can pass in `[PiiEntityCategoryType. US_SOCIAL_SECURITY_NUMBER]` for this kwarg.

is also set, the result will contain entities in the intersection of `domain_filter` and the items in
`categories_filter`.
:paramtype categories_filter: list[~azure.ai.textanalytics.PiiEntityCategoryType]
:keyword str string_index_type: Specifies the method used to interpret string offsets.
`UnicodeCodePoint`, the Python encoding, is the default. To override the Python default,
you can also pass in `Utf16CodePoint` or TextElements_v8`. For additional information
see https://aka.ms/text-analytics-offsets
:ivar str model_version: The model version to use for the analysis.
:ivar str domain_filter: An optional string to set the PII domain to include only a
subset of the PII entity categories. Possible values include 'phi' or None.
:ivar categories_filter: A list of specific PII entity categories to return. If the value of `domain_filter`
is also set, the result will contain entities in the intersection of `domain_filter` and the list
`categories_filter`.
:vartype categories_filter: list[~azure.ai.textanalytics.PiiEntityCategoryType]
:ivar str string_index_type: Specifies the method used to interpret string offsets.
`UnicodeCodePoint`, the Python encoding, is the default. To override the Python default,
you can also pass in `Utf16CodePoint` or TextElements_v8`. For additional information
Expand All @@ -1427,17 +1435,24 @@ def __init__(self, **kwargs):
self.model_version = kwargs.get("model_version", "latest")
self.domain_filter = kwargs.get("domain_filter", None)
self.string_index_type = kwargs.get("string_index_type", "UnicodeCodePoint")
self.categories_filter = kwargs.get("categories_filter", None)

def __repr__(self, **kwargs):
return "RecognizePiiEntitiesAction(model_version={}, domain_filter={}, string_index_type={})" \
.format(self.model_version, self.domain_filter, self.string_index_type)[:1024]
return "RecognizePiiEntitiesAction(model_version={}, domain_filter={}, string_index_type={}, " \
"categories_filter={})".format(
self.model_version,
self.domain_filter,
self.string_index_type,
self.categories_filter
)[:1024]

def to_generated(self):
return _latest_preview_models.PiiTask(
parameters=_latest_preview_models.PiiTaskParameters(
model_version=self.model_version,
domain=self.domain_filter,
string_index_type=self.string_index_type
string_index_type=self.string_index_type,
pii_entity_categories=self.categories_filter
abhahn marked this conversation as resolved.
Show resolved Hide resolved
)
)

Expand Down
Loading