Skip to content

Commit

Permalink
#352 generalization-related refactoring. Considering calc_diff as a…
Browse files Browse the repository at this point in the history
… separated class of BaseComparisonCalculator type.
nicolay-r committed Jun 30, 2022
1 parent 57b6780 commit 17d5b31
Showing 6 changed files with 118 additions and 91 deletions.
Empty file.
4 changes: 4 additions & 0 deletions arekit/common/evaluation/calc/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
class BaseComparisonCalculator(object):

def calc_diff(self, etalon_opins, test_opins, is_label_supported):
raise NotImplementedError()
95 changes: 95 additions & 0 deletions arekit/common/evaluation/calc/opinions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
from arekit.common.evaluation.calc.base import BaseComparisonCalculator
from arekit.common.evaluation.evaluators.modes import EvaluationModes
from arekit.common.evaluation.evaluators.utils import label_to_str, check_is_supported
from arekit.common.labels.base import Label
from arekit.common.opinions.base import Opinion
from arekit.common.opinions.collection import OpinionCollection


class OpinionsComparisonCalculator(BaseComparisonCalculator):
""" Performs a comparison of a couple OpinionCollections.
"""

def __init__(self, eval_mode):
assert(isinstance(eval_mode, EvaluationModes))
self.__eval_mode = eval_mode

# region private methods

@staticmethod
def __cmp_result(l1, l2):
assert (isinstance(l1, Label) or l1 is None)
assert (isinstance(l2, Label) or l2 is None)

if l1 is None or l2 is None:
return False

return l1 == l2

def __iter_diff_core(self, etalon_opins, test_opins):
assert (isinstance(etalon_opins, OpinionCollection))
assert (isinstance(test_opins, OpinionCollection))

for o_etalon in etalon_opins:
assert (isinstance(o_etalon, Opinion))

o_test = test_opins.try_get_synonyms_opinion(o_etalon)
has_opinion = o_test is not None

if self.__eval_mode == EvaluationModes.Classification:
# In case of evaluation mode, we do not consider such
# cases when etalon opinion was not found in result.
if not has_opinion:
continue
# Otherwise provide the information for further comparison.
yield [o_etalon, o_etalon.Sentiment, o_test.Sentiment]
elif self.__eval_mode == EvaluationModes.Extraction:
yield [o_etalon,
o_etalon.Sentiment,
None if not has_opinion else o_test.Sentiment]

for o_test in test_opins:
assert (isinstance(o_test, Opinion))
has_opinion = etalon_opins.has_synonymous_opinion(o_test)

if has_opinion:
# This case was covered by the prior loop.
continue

if self.__eval_mode == EvaluationModes.Classification:
# That could not be possible, since we perform
# classification of already provided opinions.
raise Exception("Opinion of test collection (`{s}`->`{t}`) was not "
"found in etalon collection!".format(s=o_test.SourceValue,
t=o_test.TargetValue))
elif self.__eval_mode == EvaluationModes.Extraction:
yield [o_test, None, o_test.Sentiment]

# endregion

def calc_diff(self, etalon, test, is_label_supported):
""" Calculate the difference between a couple OpinionCollections
"""
assert(isinstance(etalon, OpinionCollection))
assert(isinstance(test, OpinionCollection))
assert (callable(is_label_supported))

it = self.__iter_diff_core(etalon_opins=etalon, test_opins=test)

# Cache all rows into `rows` array
rows = []
for args in it:
opin, etalon_label, result_label = args

check_is_supported(label=etalon_label, is_label_supported=is_label_supported)
check_is_supported(label=result_label, is_label_supported=is_label_supported)

row = [opin.SourceValue,
opin.TargetValue,
None if etalon_label is None else label_to_str(etalon_label),
None if result_label is None else label_to_str(result_label),
self.__cmp_result(l1=etalon_label, l2=result_label)]

rows.append(row)

return rows
95 changes: 7 additions & 88 deletions arekit/common/evaluation/evaluators/base.py
Original file line number Diff line number Diff line change
@@ -1,73 +1,17 @@
import collections

from arekit.common.evaluation.calc.opinions import OpinionsComparisonCalculator
from arekit.common.evaluation.cmp_opinions import OpinionCollectionsToCompare
from arekit.common.evaluation.evaluators.cmp_table import DocumentCompareTable
from arekit.common.evaluation.evaluators.modes import EvaluationModes
from arekit.common.evaluation.evaluators.utils import label_to_str
from arekit.common.evaluation.results.base import BaseEvalResult
from arekit.common.labels.base import Label
from arekit.common.opinions.base import Opinion
from arekit.common.opinions.collection import OpinionCollection


class BaseEvaluator(object):

def __init__(self, eval_mode):
assert(isinstance(eval_mode, EvaluationModes))
self.__eval_mode = eval_mode

# region private methods

@staticmethod
def __cmp_result(l1, l2):
assert(isinstance(l1, Label) or l1 is None)
assert(isinstance(l2, Label) or l2 is None)

if l1 is None or l2 is None:
return False

return l1 == l2

def __iter_diff_core(self, etalon_opins, test_opins):
assert(isinstance(etalon_opins, OpinionCollection))
assert(isinstance(test_opins, OpinionCollection))

for o_etalon in etalon_opins:
assert(isinstance(o_etalon, Opinion))

o_test = test_opins.try_get_synonyms_opinion(o_etalon)
has_opinion = o_test is not None

if self.__eval_mode == EvaluationModes.Classification:
# In case of evaluation mode, we do not consider such
# cases when etalon opinion was not found in result.
if not has_opinion:
continue
# Otherwise provide the information for further comparison.
yield [o_etalon, o_etalon.Sentiment, o_test.Sentiment]
elif self.__eval_mode == EvaluationModes.Extraction:
yield [o_etalon,
o_etalon.Sentiment,
None if not has_opinion else o_test.Sentiment]

for o_test in test_opins:
assert(isinstance(o_test, Opinion))
has_opinion = etalon_opins.has_synonymous_opinion(o_test)

if has_opinion:
# This case was covered by the prior loop.
continue

if self.__eval_mode == EvaluationModes.Classification:
# That could not be possible, since we perform
# classification of already provided opinions.
raise Exception("Opinion of test collection (`{s}`->`{t}`) was not "
"found in etalon collection!".format(s=o_test.SourceValue,
t=o_test.TargetValue))
elif self.__eval_mode == EvaluationModes.Extraction:
yield [o_test, None, o_test.Sentiment]

# endregion
self.__calc = OpinionsComparisonCalculator(eval_mode)

# region abstract methods

@@ -81,36 +25,11 @@ def _create_eval_result(self):

# region protected methods

def _check_is_supported(self, label, is_label_supported):
if label is None:
return True

if not is_label_supported(label):
raise Exception("Label \"{label}\" is not supported by {e}".format(
label=label_to_str(label),
e=type(self).__name__))

def _calc_diff(self, etalon_opins, test_opins, is_label_supported):
def _calc_diff(self, etalon_data, test_data, is_label_supported):
assert(callable(is_label_supported))

it = self.__iter_diff_core(etalon_opins=etalon_opins,
test_opins=test_opins)

# Cache all rows into `rows` array
rows = []
for args in it:
opin, etalon_label, result_label = args

self._check_is_supported(label=etalon_label, is_label_supported=is_label_supported)
self._check_is_supported(label=result_label, is_label_supported=is_label_supported)

row = [opin.SourceValue,
opin.TargetValue,
None if etalon_label is None else label_to_str(etalon_label),
None if result_label is None else label_to_str(result_label),
self.__cmp_result(l1=etalon_label, l2=result_label)]

rows.append(row)
# Obtaining comparison rows.
rows = self.__calc.calc_diff(etalon=etalon_data, test=test_data, is_label_supported=is_label_supported)

# Filling dataframe.
cmp_table = DocumentCompareTable.create_template_df(rows_count=len(rows))
@@ -131,8 +50,8 @@ def evaluate(self, cmp_pairs):
# Providing compared pairs in a form of tables.
for cmp_pair in cmp_pairs:
assert(isinstance(cmp_pair, OpinionCollectionsToCompare))
cmp_table = self._calc_diff(etalon_opins=cmp_pair.EtalonOpinionCollection,
test_opins=cmp_pair.TestOpinionCollection,
cmp_table = self._calc_diff(etalon_data=cmp_pair.EtalonOpinionCollection,
test_data=cmp_pair.TestOpinionCollection,
is_label_supported=result.is_label_supported)

result.reg_doc(cmp_pair=cmp_pair, cmp_table=cmp_table)
8 changes: 8 additions & 0 deletions arekit/common/evaluation/evaluators/utils.py
Original file line number Diff line number Diff line change
@@ -4,3 +4,11 @@
def label_to_str(label):
assert(isinstance(label, Label))
return label.to_class_str()


def check_is_supported(label, is_label_supported):
if label is None:
return True

if not is_label_supported(label):
raise Exception("Label \"{label}\" is not supported in evaluator!".format(label=label_to_str(label)))
7 changes: 4 additions & 3 deletions arekit/contrib/utils/evaluation/evaluators/three_class.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from arekit.common.evaluation.evaluators.base import BaseEvaluator
from arekit.common.evaluation.evaluators.modes import EvaluationModes
from arekit.common.evaluation.evaluators.utils import check_is_supported
from arekit.common.opinions.collection import OpinionCollection
from arekit.contrib.utils.evaluation.results.three_class import ThreeClassEvalResult

@@ -27,13 +28,13 @@ def _calc_diff(self, etalon_opins, test_opins, is_label_supported):
# We keep only those opinions that were not
# presented in test and has neutral label

self._check_is_supported(label=opinion.Sentiment, is_label_supported=is_label_supported)
check_is_supported(label=opinion.Sentiment, is_label_supported=is_label_supported)

if not test_opins_expanded.has_synonymous_opinion(opinion) and opinion.Sentiment == neut_label:
test_opins_expanded.add_opinion(opinion)

return super(ThreeClassEvaluator, self)._calc_diff(etalon_opins=etalon_opins,
test_opins=test_opins_expanded,
return super(ThreeClassEvaluator, self)._calc_diff(etalon_data=etalon_opins,
test_data=test_opins_expanded,
is_label_supported=is_label_supported)

def _create_eval_result(self):

0 comments on commit 17d5b31

Please sign in to comment.