Skip to content

Commit

Permalink
[Search] Add skillset validation (#20669)
Browse files Browse the repository at this point in the history
* Add client-side validation and test.

* Add skillset validation test.

* Make code more Pythonic.
  • Loading branch information
tjprescott authored Sep 15, 2021
1 parent 6a06b0e commit 1efff7f
Show file tree
Hide file tree
Showing 13 changed files with 233 additions and 208 deletions.
1 change: 1 addition & 0 deletions sdk/search/azure-search-documents/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

- Renamed `SearchClient.speller` to `SearchClient.query_speller`.
- Removed keyword arguments from `SearchClient`: `answers` and `captions`.
- `SentimentSkill`, `EntityRecognitionSkill`: added client-side validation to prevent sending unsupported parameters.

### Bugs Fixed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@
get_access_conditions,
normalize_endpoint,
)
from .models import SearchIndexerDataSourceConnection
from .models import (
EntityRecognitionSkillVersion,
SearchIndexerDataSourceConnection,
SentimentSkillVersion
)
from .._api_versions import DEFAULT_VERSION
from .._headers_mixin import HeadersMixin
from .._utils import get_authentication_policy
Expand Down Expand Up @@ -564,7 +568,9 @@ def create_skillset(self, skillset, **kwargs):
"""
kwargs["headers"] = self._merge_client_headers(kwargs.get("headers"))
_validate_skillset(skillset)
skillset = skillset._to_generated() if hasattr(skillset, '_to_generated') else skillset # pylint:disable=protected-access

result = self._client.skillsets.create(skillset, **kwargs)
return SearchIndexerSkillset._from_generated(result) # pylint:disable=protected-access

Expand All @@ -587,6 +593,7 @@ def create_or_update_skillset(self, skillset, **kwargs):
skillset, kwargs.pop("match_condition", MatchConditions.Unconditionally)
)
kwargs.update(access_condition)
_validate_skillset(skillset)
skillset = skillset._to_generated() if hasattr(skillset, '_to_generated') else skillset # pylint:disable=protected-access

result = self._client.skillsets.create_or_update(
Expand All @@ -596,3 +603,44 @@ def create_or_update_skillset(self, skillset, **kwargs):
**kwargs
)
return SearchIndexerSkillset._from_generated(result) # pylint:disable=protected-access

def _validate_skillset(skillset):
"""Validates any multi-version skills in the skillset to verify that unsupported
parameters are not supplied by the user.
"""
skills = getattr(skillset, 'skills', None)
if not skills:
return

error_strings = []
for skill in skills:
try:
skill_version = skill.get('skill_version')
except AttributeError:
skill_version = getattr(skill, 'skill_version', None)
if not skill_version:
continue

if skill_version == SentimentSkillVersion.V1:
unsupported = ['model_version', 'include_opinion_mining']
elif skill_version == SentimentSkillVersion.V3:
unsupported = []
elif skill_version == EntityRecognitionSkillVersion.V1:
unsupported = ['model_version']
elif skill_version == EntityRecognitionSkillVersion.V3:
unsupported = ['include_typeless_entities']

errors = []
for item in unsupported:
try:
if skill.get(item, None):
errors.append(item)
except AttributeError:
if skill.__dict__.get(item, None):
errors.append(item)
if errors:
error_strings.append("Unsupported parameters for skill version {}: {}".format(
skill_version, ", ".join(errors))
)
if error_strings:
raise ValueError("\n".join(error_strings))
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ class EntityRecognitionSkill(SearchIndexerSkill):
'include_typeless_entities': {'key': 'includeTypelessEntities', 'type': 'bool'},
'minimum_precision': {'key': 'minimumPrecision', 'type': 'float'},
'model_version': {'key': 'modelVersion', 'type': 'str'},
'skill_version': {'key': 'skillVersion', 'type': 'str'}
}

def __init__(
Expand Down Expand Up @@ -210,8 +211,7 @@ def _to_generated(self):
categories=self.categories,
default_language_code=self.default_language_code,
include_typeless_entities=self.include_typeless_entities,
minimum_precision=self.minimum_precision,
model_version=self.model_version
minimum_precision=self.minimum_precision
)
if self.skill_version in [EntityRecognitionSkillVersion.V3, EntityRecognitionSkillVersion.LATEST]:
return _EntityRecognitionSkillV3(
Expand All @@ -221,7 +221,6 @@ def _to_generated(self):
odata_type=self.odata_type,
categories=self.categories,
default_language_code=self.default_language_code,
include_typeless_entities=self.include_typeless_entities,
minimum_precision=self.minimum_precision,
model_version=self.model_version
)
Expand Down Expand Up @@ -318,6 +317,7 @@ class SentimentSkill(SearchIndexerSkill):
'default_language_code': {'key': 'defaultLanguageCode', 'type': 'str'},
'include_opinion_mining': {'key': 'includeOpinionMining', 'type': 'bool'},
'model_version': {'key': 'modelVersion', 'type': 'str'},
'skill_version': {'key': 'skillVersion', 'type': 'str'}
}

def __init__(
Expand All @@ -331,7 +331,7 @@ def __init__(
self.skill_version = skill_version
self.odata_type = self.skill_version # type: str
self.default_language_code = kwargs.get('default_language_code', None)
self.include_opinion_mining = kwargs.get('include_opinion_mining', False)
self.include_opinion_mining = kwargs.get('include_opinion_mining', None)
self.model_version = kwargs.get('model_version', None)

def _to_generated(self):
Expand All @@ -341,10 +341,8 @@ def _to_generated(self):
outputs=self.outputs,
name=self.name,
odata_type=self.odata_type,
default_language_code=self.default_language_code,
include_opinion_mining=self.include_opinion_mining,
model_version=self.model_version
)
default_language_code=self.default_language_code
)
if self.skill_version in [SentimentSkillVersion.V3, SentimentSkillVersion.LATEST]:
return _SentimentSkillV3(
inputs=self.inputs,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ interactions:
Prefer:
- return=representation
User-Agent:
- azsdk-python-search-documents/11.3.0b3 Python/3.9.2 (Windows-10-10.0.19041-SP0)
- azsdk-python-search-documents/11.3.0b4 Python/3.9.2 (Windows-10-10.0.19041-SP0)
method: PUT
uri: https://searche2bf1c71.search.windows.net/skillsets('test-ss')?api-version=2021-04-30-Preview
response:
body:
string: '{"@odata.context":"https://searche2bf1c71.search.windows.net/$metadata#skillsets/$entity","@odata.etag":"\"0x8D96E6871C89346\"","name":"test-ss","description":"desc1","skills":[{"@odata.type":"#Microsoft.Skills.Text.EntityRecognitionSkill","name":null,"description":null,"context":null,"categories":[],"defaultLanguageCode":null,"minimumPrecision":null,"includeTypelessEntities":null,"inputs":[{"name":"text","source":"/document/content","sourceContext":null,"inputs":[]}],"outputs":[{"name":"organizations","targetName":"organizations"}]}],"cognitiveServices":null,"knowledgeStore":null,"encryptionKey":null}'
string: '{"@odata.context":"https://searche2bf1c71.search.windows.net/$metadata#skillsets/$entity","@odata.etag":"\"0x8D977BF242A141A\"","name":"test-ss","description":"desc1","skills":[{"@odata.type":"#Microsoft.Skills.Text.EntityRecognitionSkill","name":null,"description":null,"context":null,"categories":[],"defaultLanguageCode":null,"minimumPrecision":null,"includeTypelessEntities":null,"inputs":[{"name":"text","source":"/document/content","sourceContext":null,"inputs":[]}],"outputs":[{"name":"organizations","targetName":"organizations"}]}],"cognitiveServices":null,"knowledgeStore":null,"encryptionKey":null}'
headers:
cache-control:
- no-cache
Expand All @@ -32,11 +32,11 @@ interactions:
content-type:
- application/json; odata.metadata=minimal
date:
- Thu, 02 Sep 2021 23:21:51 GMT
- Tue, 14 Sep 2021 20:35:07 GMT
elapsed-time:
- '1433'
- '2124'
etag:
- W/"0x8D96E6871C89346"
- W/"0x8D977BF242A141A"
expires:
- '-1'
location:
Expand All @@ -48,7 +48,7 @@ interactions:
preference-applied:
- odata.include-annotations="*"
request-id:
- 8ded272a-0c44-11ec-ac43-74c63bed1137
- 3ff59952-159b-11ec-bda5-74c63bed1137
strict-transport-security:
- max-age=15724800; includeSubDomains
status:
Expand All @@ -73,12 +73,12 @@ interactions:
Prefer:
- return=representation
User-Agent:
- azsdk-python-search-documents/11.3.0b3 Python/3.9.2 (Windows-10-10.0.19041-SP0)
- azsdk-python-search-documents/11.3.0b4 Python/3.9.2 (Windows-10-10.0.19041-SP0)
method: PUT
uri: https://searche2bf1c71.search.windows.net/skillsets('test-ss')?api-version=2021-04-30-Preview
response:
body:
string: '{"@odata.context":"https://searche2bf1c71.search.windows.net/$metadata#skillsets/$entity","@odata.etag":"\"0x8D96E6871DB3571\"","name":"test-ss","description":"desc2","skills":[{"@odata.type":"#Microsoft.Skills.Text.EntityRecognitionSkill","name":null,"description":null,"context":null,"categories":[],"defaultLanguageCode":null,"minimumPrecision":null,"includeTypelessEntities":null,"inputs":[{"name":"text","source":"/document/content","sourceContext":null,"inputs":[]}],"outputs":[{"name":"organizations","targetName":"organizations"}]}],"cognitiveServices":null,"knowledgeStore":null,"encryptionKey":null}'
string: '{"@odata.context":"https://searche2bf1c71.search.windows.net/$metadata#skillsets/$entity","@odata.etag":"\"0x8D977BF243E8A49\"","name":"test-ss","description":"desc2","skills":[{"@odata.type":"#Microsoft.Skills.Text.EntityRecognitionSkill","name":null,"description":null,"context":null,"categories":[],"defaultLanguageCode":null,"minimumPrecision":null,"includeTypelessEntities":null,"inputs":[{"name":"text","source":"/document/content","sourceContext":null,"inputs":[]}],"outputs":[{"name":"organizations","targetName":"organizations"}]}],"cognitiveServices":null,"knowledgeStore":null,"encryptionKey":null}'
headers:
cache-control:
- no-cache
Expand All @@ -87,11 +87,11 @@ interactions:
content-type:
- application/json; odata.metadata=minimal
date:
- Thu, 02 Sep 2021 23:21:51 GMT
- Tue, 14 Sep 2021 20:35:07 GMT
elapsed-time:
- '46'
- '85'
etag:
- W/"0x8D96E6871DB3571"
- W/"0x8D977BF243E8A49"
expires:
- '-1'
odata-version:
Expand All @@ -101,7 +101,7 @@ interactions:
preference-applied:
- odata.include-annotations="*"
request-id:
- 8ef2919c-0c44-11ec-a88c-74c63bed1137
- 41558274-159b-11ec-bc0f-74c63bed1137
strict-transport-security:
- max-age=15724800; includeSubDomains
vary:
Expand All @@ -119,12 +119,12 @@ interactions:
Connection:
- keep-alive
User-Agent:
- azsdk-python-search-documents/11.3.0b3 Python/3.9.2 (Windows-10-10.0.19041-SP0)
- azsdk-python-search-documents/11.3.0b4 Python/3.9.2 (Windows-10-10.0.19041-SP0)
method: GET
uri: https://searche2bf1c71.search.windows.net/skillsets?api-version=2021-04-30-Preview
response:
body:
string: '{"@odata.context":"https://searche2bf1c71.search.windows.net/$metadata#skillsets","value":[{"@odata.etag":"\"0x8D96E6871DB3571\"","name":"test-ss","description":"desc2","skills":[{"@odata.type":"#Microsoft.Skills.Text.EntityRecognitionSkill","name":"#1","description":null,"context":"/document","categories":["Person","Quantity","Organization","URL","Email","Location","DateTime"],"defaultLanguageCode":"en","minimumPrecision":null,"includeTypelessEntities":null,"inputs":[{"name":"text","source":"/document/content","sourceContext":null,"inputs":[]}],"outputs":[{"name":"organizations","targetName":"organizations"}]}],"cognitiveServices":null,"knowledgeStore":null,"encryptionKey":null}]}'
string: '{"@odata.context":"https://searche2bf1c71.search.windows.net/$metadata#skillsets","value":[{"@odata.etag":"\"0x8D977BF243E8A49\"","name":"test-ss","description":"desc2","skills":[{"@odata.type":"#Microsoft.Skills.Text.EntityRecognitionSkill","name":"#1","description":null,"context":"/document","categories":["Person","Quantity","Organization","URL","Email","Location","DateTime"],"defaultLanguageCode":"en","minimumPrecision":null,"includeTypelessEntities":null,"inputs":[{"name":"text","source":"/document/content","sourceContext":null,"inputs":[]}],"outputs":[{"name":"organizations","targetName":"organizations"}]}],"cognitiveServices":null,"knowledgeStore":null,"encryptionKey":null}]}'
headers:
cache-control:
- no-cache
Expand All @@ -133,9 +133,9 @@ interactions:
content-type:
- application/json; odata.metadata=minimal
date:
- Thu, 02 Sep 2021 23:21:51 GMT
- Tue, 14 Sep 2021 20:35:07 GMT
elapsed-time:
- '139'
- '69'
expires:
- '-1'
odata-version:
Expand All @@ -145,7 +145,7 @@ interactions:
preference-applied:
- odata.include-annotations="*"
request-id:
- 8f03c3cb-0c44-11ec-94e8-74c63bed1137
- 4169c877-159b-11ec-9f5a-74c63bed1137
strict-transport-security:
- max-age=15724800; includeSubDomains
vary:
Expand All @@ -163,12 +163,12 @@ interactions:
Connection:
- keep-alive
User-Agent:
- azsdk-python-search-documents/11.3.0b3 Python/3.9.2 (Windows-10-10.0.19041-SP0)
- azsdk-python-search-documents/11.3.0b4 Python/3.9.2 (Windows-10-10.0.19041-SP0)
method: GET
uri: https://searche2bf1c71.search.windows.net/skillsets('test-ss')?api-version=2021-04-30-Preview
response:
body:
string: '{"@odata.context":"https://searche2bf1c71.search.windows.net/$metadata#skillsets/$entity","@odata.etag":"\"0x8D96E6871DB3571\"","name":"test-ss","description":"desc2","skills":[{"@odata.type":"#Microsoft.Skills.Text.EntityRecognitionSkill","name":"#1","description":null,"context":"/document","categories":["Person","Quantity","Organization","URL","Email","Location","DateTime"],"defaultLanguageCode":"en","minimumPrecision":null,"includeTypelessEntities":null,"inputs":[{"name":"text","source":"/document/content","sourceContext":null,"inputs":[]}],"outputs":[{"name":"organizations","targetName":"organizations"}]}],"cognitiveServices":null,"knowledgeStore":null,"encryptionKey":null}'
string: '{"@odata.context":"https://searche2bf1c71.search.windows.net/$metadata#skillsets/$entity","@odata.etag":"\"0x8D977BF243E8A49\"","name":"test-ss","description":"desc2","skills":[{"@odata.type":"#Microsoft.Skills.Text.EntityRecognitionSkill","name":"#1","description":null,"context":"/document","categories":["Person","Quantity","Organization","URL","Email","Location","DateTime"],"defaultLanguageCode":"en","minimumPrecision":null,"includeTypelessEntities":null,"inputs":[{"name":"text","source":"/document/content","sourceContext":null,"inputs":[]}],"outputs":[{"name":"organizations","targetName":"organizations"}]}],"cognitiveServices":null,"knowledgeStore":null,"encryptionKey":null}'
headers:
cache-control:
- no-cache
Expand All @@ -177,11 +177,11 @@ interactions:
content-type:
- application/json; odata.metadata=minimal
date:
- Thu, 02 Sep 2021 23:21:51 GMT
- Tue, 14 Sep 2021 20:35:07 GMT
elapsed-time:
- '13'
- '15'
etag:
- W/"0x8D96E6871DB3571"
- W/"0x8D977BF243E8A49"
expires:
- '-1'
odata-version:
Expand All @@ -191,7 +191,7 @@ interactions:
preference-applied:
- odata.include-annotations="*"
request-id:
- 8f22fc82-0c44-11ec-8c37-74c63bed1137
- 417b5016-159b-11ec-acf0-74c63bed1137
strict-transport-security:
- max-age=15724800; includeSubDomains
vary:
Expand Down
Loading

0 comments on commit 1efff7f

Please sign in to comment.