-
Notifications
You must be signed in to change notification settings - Fork 2.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[text analytics] opinion mining support #12542
Changes from all commits
7755968
43e72dc
a3b4510
32ddecc
ea9fca3
590dd11
399571a
1ef46f9
858e70d
e83b145
ffe563d
09cf3fd
7ee99e0
e9fee50
77c7fa8
87b4a80
83426aa
7dc065c
273fb94
4180ad0
5b804d2
f03b477
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,8 +26,11 @@ | |
TextDocumentBatchStatistics, | ||
SentenceSentiment, | ||
SentimentConfidenceScores, | ||
MinedOpinion, | ||
AspectSentiment, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be better to call this SentimentAspect and SentimentOpinion as this represents the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We have DocumentSentiment, SentenceSentiment, so if we follow the pattern. It would be AspectSentiment and OpinionSentiment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah after talking to @annelo-msft , she brought up a really good point where these all have the same basic structure (have |
||
OpinionSentiment, | ||
RecognizePiiEntitiesResult, | ||
PiiEntity | ||
PiiEntity, | ||
) | ||
|
||
__all__ = [ | ||
|
@@ -51,6 +54,9 @@ | |
'TextDocumentBatchStatistics', | ||
'SentenceSentiment', | ||
'SentimentConfidenceScores', | ||
'MinedOpinion', | ||
'AspectSentiment', | ||
'OpinionSentiment', | ||
'RecognizePiiEntitiesResult', | ||
'PiiEntity', | ||
] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,10 +3,14 @@ | |
# Copyright (c) Microsoft Corporation. | ||
# Licensed under the MIT License. | ||
# ------------------------------------ | ||
import re | ||
from ._generated.v3_0.models._models import ( | ||
LanguageInput, | ||
MultiLanguageInput | ||
) | ||
|
||
from ._generated.v3_0.models._models import LanguageInput | ||
from ._generated.v3_0.models._models import MultiLanguageInput | ||
|
||
def _get_indices(relation): | ||
return [int(s) for s in re.findall(r"\d+", relation)] | ||
|
||
class DictMixin(object): | ||
|
||
|
@@ -702,19 +706,34 @@ class SentenceSentiment(DictMixin): | |
and 1 for the sentence for all labels. | ||
:vartype confidence_scores: | ||
~azure.ai.textanalytics.SentimentConfidenceScores | ||
:ivar mined_opinions: The list of opinions mined from this sentence. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would it be good to add a ask.ms link to service documentation here about Opinion Mining? |
||
For example in "The food is good, but the service is bad", we would | ||
mind these two opinions "food is good", "service is bad". Only returned | ||
if `show_opinion_mining` is set to True in the call to `analyze_sentiment`. | ||
:vartype mined_opinions: | ||
list[~azure.ai.textanalytics.MinedOpinion] | ||
""" | ||
|
||
def __init__(self, **kwargs): | ||
self.text = kwargs.get("text", None) | ||
self.sentiment = kwargs.get("sentiment", None) | ||
self.confidence_scores = kwargs.get("confidence_scores", None) | ||
self.mined_opinions = kwargs.get("mined_opinions", None) | ||
|
||
@classmethod | ||
def _from_generated(cls, sentence): | ||
def _from_generated(cls, sentence, results): | ||
if hasattr(sentence, "aspects"): | ||
mined_opinions = ( | ||
[MinedOpinion._from_generated(aspect, results) for aspect in sentence.aspects] # pylint: disable=protected-access | ||
if sentence.aspects else [] | ||
) | ||
else: | ||
mined_opinions = None | ||
return cls( | ||
text=sentence.text, | ||
sentiment=sentence.sentiment, | ||
confidence_scores=SentimentConfidenceScores._from_generated(sentence.confidence_scores), # pylint: disable=protected-access | ||
mined_opinions=mined_opinions | ||
) | ||
|
||
def __repr__(self): | ||
|
@@ -724,6 +743,150 @@ def __repr__(self): | |
repr(self.confidence_scores) | ||
)[:1024] | ||
|
||
class MinedOpinion(DictMixin): | ||
"""A mined opinion object represents an opinion we've extracted from a sentence. | ||
It consists of both an aspect that these opinions are about, and the actual | ||
opinions themselves. | ||
|
||
:ivar aspect: The aspect of a product/service that this opinion is about | ||
:vartype aspect: ~azure.ai.textanalytics.AspectSentiment | ||
:ivar opinions: The actual opinions of the aspect | ||
:vartype opinions: list[~azure.ai.textanalytics.OpinionSentiment] | ||
""" | ||
|
||
def __init__(self, **kwargs): | ||
self.aspect = kwargs.get("aspect", None) | ||
self.opinions = kwargs.get("opinions", None) | ||
|
||
@staticmethod | ||
def _get_opinions(relations, results): | ||
if not relations: | ||
return [] | ||
opinion_relations = [r.ref for r in relations if r.relation_type == "opinion"] | ||
opinions = [] | ||
for opinion_relation in opinion_relations: | ||
nums = _get_indices(opinion_relation) | ||
document_index = nums[0] | ||
sentence_index = nums[1] | ||
opinion_index = nums[2] | ||
opinions.append( | ||
results[document_index].sentences[sentence_index].opinions[opinion_index] | ||
) | ||
return opinions | ||
|
||
@classmethod | ||
def _from_generated(cls, aspect, results): | ||
return cls( | ||
aspect=AspectSentiment._from_generated(aspect), # pylint: disable=protected-access | ||
opinions=[ | ||
OpinionSentiment._from_generated(opinion) for opinion in cls._get_opinions(aspect.relations, results) # pylint: disable=protected-access | ||
], | ||
) | ||
|
||
def __repr__(self): | ||
return "MinedOpinion(aspect={}, opinions={})".format( | ||
repr(self.aspect), | ||
repr(self.opinions) | ||
)[:1024] | ||
|
||
|
||
class AspectSentiment(DictMixin): | ||
"""AspectSentiment contains the related opinions, predicted sentiment, | ||
confidence scores and other information about an aspect of a product. | ||
An aspect of a product/service is a key component of that product/service. | ||
For example in "The food at Hotel Foo is good", "food" is an aspect of | ||
"Hotel Foo". | ||
|
||
:ivar str text: The aspect text. | ||
:ivar str sentiment: The predicted Sentiment for the aspect. Possible values | ||
include 'positive', 'mixed', and 'negative'. | ||
:ivar confidence_scores: The sentiment confidence score between 0 | ||
and 1 for the aspect for 'positive' and 'negative' labels. It's score | ||
for 'neutral' will always be 0 | ||
:vartype confidence_scores: | ||
~azure.ai.textanalytics.SentimentConfidenceScores | ||
:ivar int offset: The aspect offset from the start of the sentence. | ||
:ivar int length: The length of the aspect. | ||
""" | ||
|
||
def __init__(self, **kwargs): | ||
self.text = kwargs.get("text", None) | ||
self.sentiment = kwargs.get("sentiment", None) | ||
self.confidence_scores = kwargs.get("confidence_scores", None) | ||
self.offset = kwargs.get("offset", None) | ||
self.length = kwargs.get("length", None) | ||
|
||
@classmethod | ||
def _from_generated(cls, aspect): | ||
return cls( | ||
text=aspect.text, | ||
sentiment=aspect.sentiment, | ||
confidence_scores=SentimentConfidenceScores._from_generated(aspect.confidence_scores), # pylint: disable=protected-access | ||
offset=aspect.offset, | ||
length=aspect.length | ||
) | ||
|
||
def __repr__(self): | ||
return "AspectSentiment(text={}, sentiment={}, confidence_scores={}, offset={}, length={})".format( | ||
self.text, | ||
self.sentiment, | ||
repr(self.confidence_scores), | ||
self.offset, | ||
self.length | ||
)[:1024] | ||
|
||
|
||
class OpinionSentiment(DictMixin): | ||
"""OpinionSentiment contains the predicted sentiment, | ||
confidence scores and other information about an opinion of an aspect. | ||
For example, in the sentence "The food is good", the opinion of the | ||
aspect 'food' is 'good'. | ||
|
||
:ivar str text: The opinion text. | ||
:ivar str sentiment: The predicted Sentiment for the opinion. Possible values | ||
include 'positive', 'mixed', and 'negative'. | ||
:ivar confidence_scores: The sentiment confidence score between 0 | ||
and 1 for the opinion for 'positive' and 'negative' labels. It's score | ||
for 'neutral' will always be 0 | ||
:vartype confidence_scores: | ||
~azure.ai.textanalytics.SentimentConfidenceScores | ||
:ivar int offset: The opinion offset from the start of the sentence. | ||
:ivar int length: The length of the opinion. | ||
:ivar bool is_negated: Whether the opinion is negated. For example, in | ||
"The food is not good", the opinion "good" is negated. | ||
""" | ||
|
||
def __init__(self, **kwargs): | ||
self.text = kwargs.get("text", None) | ||
self.sentiment = kwargs.get("sentiment", None) | ||
self.confidence_scores = kwargs.get("confidence_scores", None) | ||
self.offset = kwargs.get("offset", None) | ||
self.length = kwargs.get("length", None) | ||
self.is_negated = kwargs.get("is_negated", None) | ||
|
||
@classmethod | ||
def _from_generated(cls, opinion): | ||
return cls( | ||
text=opinion.text, | ||
sentiment=opinion.sentiment, | ||
confidence_scores=SentimentConfidenceScores._from_generated(opinion.confidence_scores), # pylint: disable=protected-access | ||
offset=opinion.offset, | ||
length=opinion.length, | ||
is_negated=opinion.is_negated | ||
) | ||
|
||
def __repr__(self): | ||
return ( | ||
"OpinionSentiment(text={}, sentiment={}, confidence_scores={}, offset={}, length={}, is_negated={})".format( | ||
self.text, | ||
self.sentiment, | ||
repr(self.confidence_scores), | ||
self.offset, | ||
self.length, | ||
self.is_negated | ||
)[:1024] | ||
) | ||
|
||
|
||
class SentimentConfidenceScores(DictMixin): | ||
"""The confidence scores (Softmax scores) between 0 and 1. | ||
|
@@ -738,15 +901,15 @@ class SentimentConfidenceScores(DictMixin): | |
""" | ||
|
||
def __init__(self, **kwargs): | ||
self.positive = kwargs.get('positive', None) | ||
self.neutral = kwargs.get('neutral', None) | ||
self.negative = kwargs.get('negative', None) | ||
self.positive = kwargs.get('positive', 0.0) | ||
self.neutral = kwargs.get('neutral', 0.0) | ||
self.negative = kwargs.get('negative', 0.0) | ||
|
||
@classmethod | ||
def _from_generated(cls, score): | ||
return cls( | ||
positive=score.positive, | ||
neutral=score.neutral, | ||
neutral=score.neutral if hasattr(score, "netural") else 0.0, | ||
negative=score.negative | ||
) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
# ------------------------------------ | ||
# Copyright (c) Microsoft Corporation. | ||
# Licensed under the MIT License. | ||
# ------------------------------------ | ||
from enum import Enum | ||
from typing import TYPE_CHECKING | ||
|
||
if TYPE_CHECKING: | ||
from typing import Union | ||
|
||
|
||
class ApiVersion(str, Enum): | ||
"""Text Analytics API versions supported by this package""" | ||
|
||
#: this is the default version | ||
V3_1_PREVIEW_1 = "v3.1-preview.1" | ||
V3_0 = "v3.0" | ||
|
||
|
||
def load_generated_api(api_version, aio=False): | ||
try: | ||
# api_version could be a string; map it to an instance of ApiVersion | ||
# (this is a no-op if it's already an instance of ApiVersion) | ||
api_version = ApiVersion(api_version) | ||
except ValueError: | ||
# api_version is unknown to ApiVersion | ||
raise NotImplementedError( | ||
"This package doesn't support API version '{}'. ".format(api_version) | ||
+ "Supported versions: {}".format(", ".join(v.value for v in ApiVersion)) | ||
) | ||
|
||
if api_version == ApiVersion.V3_1_PREVIEW_1: | ||
if aio: | ||
from ._generated.v3_1_preview_1.aio import TextAnalyticsClient | ||
else: | ||
from ._generated.v3_1_preview_1 import TextAnalyticsClient # type: ignore | ||
elif api_version == ApiVersion.V3_0: | ||
if aio: | ||
from ._generated.v3_0.aio import TextAnalyticsClient # type: ignore | ||
else: | ||
from ._generated.v3_0 import TextAnalyticsClient # type: ignore | ||
return TextAnalyticsClient |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
😄
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
are you gloating
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
no I'm smiling - like the emoji is