#352 generalization-related refactoring. Considering calc_diff as a…

… separated class of BaseComparisonCalculator type.
nicolay-r · Jun 30, 2022 · 17d5b31 · 17d5b31
1 parent 57b6780
commit 17d5b31
Showing 6 changed files with 118 additions and 91 deletions.
diff --git a/arekit/common/evaluation/calc/__init__.py b/arekit/common/evaluation/calc/__init__.py
diff --git a/arekit/common/evaluation/calc/base.py b/arekit/common/evaluation/calc/base.py
@@ -0,0 +1,4 @@
+class BaseComparisonCalculator(object):
+
+    def calc_diff(self, etalon_opins, test_opins, is_label_supported):
+        raise NotImplementedError()
diff --git a/arekit/common/evaluation/calc/opinions.py b/arekit/common/evaluation/calc/opinions.py
@@ -0,0 +1,95 @@
+from arekit.common.evaluation.calc.base import BaseComparisonCalculator
+from arekit.common.evaluation.evaluators.modes import EvaluationModes
+from arekit.common.evaluation.evaluators.utils import label_to_str, check_is_supported
+from arekit.common.labels.base import Label
+from arekit.common.opinions.base import Opinion
+from arekit.common.opinions.collection import OpinionCollection
+
+
+class OpinionsComparisonCalculator(BaseComparisonCalculator):
+    """ Performs a comparison of a couple OpinionCollections.
+    """
+
+    def __init__(self, eval_mode):
+        assert(isinstance(eval_mode, EvaluationModes))
+        self.__eval_mode = eval_mode
+
+    # region private methods
+
+    @staticmethod
+    def __cmp_result(l1, l2):
+        assert (isinstance(l1, Label) or l1 is None)
+        assert (isinstance(l2, Label) or l2 is None)
+
+        if l1 is None or l2 is None:
+            return False
+
+        return l1 == l2
+
+    def __iter_diff_core(self, etalon_opins, test_opins):
+        assert (isinstance(etalon_opins, OpinionCollection))
+        assert (isinstance(test_opins, OpinionCollection))
+
+        for o_etalon in etalon_opins:
+            assert (isinstance(o_etalon, Opinion))
+
+            o_test = test_opins.try_get_synonyms_opinion(o_etalon)
+            has_opinion = o_test is not None
+
+            if self.__eval_mode == EvaluationModes.Classification:
+                # In case of evaluation mode, we do not consider such
+                # cases when etalon opinion was not found in result.
+                if not has_opinion:
+                    continue
+                # Otherwise provide the information for further comparison.
+                yield [o_etalon, o_etalon.Sentiment, o_test.Sentiment]
+            elif self.__eval_mode == EvaluationModes.Extraction:
+                yield [o_etalon,
+                       o_etalon.Sentiment,
+                       None if not has_opinion else o_test.Sentiment]
+
+        for o_test in test_opins:
+            assert (isinstance(o_test, Opinion))
+            has_opinion = etalon_opins.has_synonymous_opinion(o_test)
+
+            if has_opinion:
+                # This case was covered by the prior loop.
+                continue
+
+            if self.__eval_mode == EvaluationModes.Classification:
+                # That could not be possible, since we perform
+                # classification of already provided opinions.
+                raise Exception("Opinion of test collection (`{s}`->`{t}`) was not "
+                                "found in etalon collection!".format(s=o_test.SourceValue,
+                                                                     t=o_test.TargetValue))
+            elif self.__eval_mode == EvaluationModes.Extraction:
+                yield [o_test, None, o_test.Sentiment]
+
+    # endregion
+
+    def calc_diff(self, etalon, test, is_label_supported):
+        """ Calculate the difference between a couple OpinionCollections
+        """
+        assert(isinstance(etalon, OpinionCollection))
+        assert(isinstance(test, OpinionCollection))
+        assert (callable(is_label_supported))
+
+        it = self.__iter_diff_core(etalon_opins=etalon, test_opins=test)
+
+        # Cache all rows into `rows` array
+        rows = []
+        for args in it:
+            opin, etalon_label, result_label = args
+
+            check_is_supported(label=etalon_label, is_label_supported=is_label_supported)
+            check_is_supported(label=result_label, is_label_supported=is_label_supported)
+
+            row = [opin.SourceValue,
+                   opin.TargetValue,
+                   None if etalon_label is None else label_to_str(etalon_label),
+                   None if result_label is None else label_to_str(result_label),
+                   self.__cmp_result(l1=etalon_label, l2=result_label)]
+
+            rows.append(row)
+
+        return rows
diff --git a/arekit/common/evaluation/evaluators/base.py b/arekit/common/evaluation/evaluators/base.py
@@ -1,73 +1,17 @@
 import collections
 
+from arekit.common.evaluation.calc.opinions import OpinionsComparisonCalculator
 from arekit.common.evaluation.cmp_opinions import OpinionCollectionsToCompare
 from arekit.common.evaluation.evaluators.cmp_table import DocumentCompareTable
 from arekit.common.evaluation.evaluators.modes import EvaluationModes
-from arekit.common.evaluation.evaluators.utils import label_to_str
 from arekit.common.evaluation.results.base import BaseEvalResult
-from arekit.common.labels.base import Label
-from arekit.common.opinions.base import Opinion
-from arekit.common.opinions.collection import OpinionCollection
 
 
 class BaseEvaluator(object):
 
     def __init__(self, eval_mode):
         assert(isinstance(eval_mode, EvaluationModes))
-        self.__eval_mode = eval_mode
-
-    # region private methods
-
-    @staticmethod
-    def __cmp_result(l1, l2):
-        assert(isinstance(l1, Label) or l1 is None)
-        assert(isinstance(l2, Label) or l2 is None)
-
-        if l1 is None or l2 is None:
-            return False
-
-        return l1 == l2
-
-    def __iter_diff_core(self, etalon_opins, test_opins):
-        assert(isinstance(etalon_opins, OpinionCollection))
-        assert(isinstance(test_opins, OpinionCollection))
-
-        for o_etalon in etalon_opins:
-            assert(isinstance(o_etalon, Opinion))
-
-            o_test = test_opins.try_get_synonyms_opinion(o_etalon)
-            has_opinion = o_test is not None
-
-            if self.__eval_mode == EvaluationModes.Classification:
-                # In case of evaluation mode, we do not consider such
-                # cases when etalon opinion was not found in result.
-                if not has_opinion:
-                    continue
-                # Otherwise provide the information for further comparison.
-                yield [o_etalon, o_etalon.Sentiment, o_test.Sentiment]
-            elif self.__eval_mode == EvaluationModes.Extraction:
-                yield [o_etalon,
-                       o_etalon.Sentiment,
-                       None if not has_opinion else o_test.Sentiment]
-
-        for o_test in test_opins:
-            assert(isinstance(o_test, Opinion))
-            has_opinion = etalon_opins.has_synonymous_opinion(o_test)
-
-            if has_opinion:
-                # This case was covered by the prior loop.
-                continue
-
-            if self.__eval_mode == EvaluationModes.Classification:
-                # That could not be possible, since we perform
-                # classification of already provided opinions.
-                raise Exception("Opinion of test collection (`{s}`->`{t}`) was not "
-                                "found in etalon collection!".format(s=o_test.SourceValue,
-                                                                      t=o_test.TargetValue))
-            elif self.__eval_mode == EvaluationModes.Extraction:
-                yield [o_test, None, o_test.Sentiment]
-
-    # endregion
+        self.__calc = OpinionsComparisonCalculator(eval_mode)
 
     # region abstract methods
 
@@ -81,36 +25,11 @@ def _create_eval_result(self):
 
     # region protected methods
 
-    def _check_is_supported(self, label, is_label_supported):
-        if label is None:
-            return True
-
-        if not is_label_supported(label):
-            raise Exception("Label \"{label}\" is not supported by {e}".format(
-                label=label_to_str(label),
-                e=type(self).__name__))
-
-    def _calc_diff(self, etalon_opins, test_opins, is_label_supported):
+    def _calc_diff(self, etalon_data, test_data, is_label_supported):
         assert(callable(is_label_supported))
 
-        it = self.__iter_diff_core(etalon_opins=etalon_opins,
-                                   test_opins=test_opins)
-
-        # Cache all rows into `rows` array
-        rows = []
-        for args in it:
-            opin, etalon_label, result_label = args
-
-            self._check_is_supported(label=etalon_label, is_label_supported=is_label_supported)
-            self._check_is_supported(label=result_label, is_label_supported=is_label_supported)
-
-            row = [opin.SourceValue,
-                   opin.TargetValue,
-                   None if etalon_label is None else label_to_str(etalon_label),
-                   None if result_label is None else label_to_str(result_label),
-                   self.__cmp_result(l1=etalon_label, l2=result_label)]
-
-            rows.append(row)
+        # Obtaining comparison rows.
+        rows = self.__calc.calc_diff(etalon=etalon_data, test=test_data, is_label_supported=is_label_supported)
 
         # Filling dataframe.
         cmp_table = DocumentCompareTable.create_template_df(rows_count=len(rows))
@@ -131,8 +50,8 @@ def evaluate(self, cmp_pairs):
         # Providing compared pairs in a form of tables.
         for cmp_pair in cmp_pairs:
             assert(isinstance(cmp_pair, OpinionCollectionsToCompare))
-            cmp_table = self._calc_diff(etalon_opins=cmp_pair.EtalonOpinionCollection,
-                                        test_opins=cmp_pair.TestOpinionCollection,
+            cmp_table = self._calc_diff(etalon_data=cmp_pair.EtalonOpinionCollection,
+                                        test_data=cmp_pair.TestOpinionCollection,
                                         is_label_supported=result.is_label_supported)
 
             result.reg_doc(cmp_pair=cmp_pair, cmp_table=cmp_table)

diff --git a/arekit/common/evaluation/evaluators/utils.py b/arekit/common/evaluation/evaluators/utils.py
@@ -4,3 +4,11 @@
 def label_to_str(label):
     assert(isinstance(label, Label))
     return label.to_class_str()
+
+
+def check_is_supported(label, is_label_supported):
+    if label is None:
+        return True
+
+    if not is_label_supported(label):
+        raise Exception("Label \"{label}\" is not supported in evaluator!".format(label=label_to_str(label)))
diff --git a/arekit/contrib/utils/evaluation/evaluators/three_class.py b/arekit/contrib/utils/evaluation/evaluators/three_class.py
@@ -1,5 +1,6 @@
 from arekit.common.evaluation.evaluators.base import BaseEvaluator
 from arekit.common.evaluation.evaluators.modes import EvaluationModes
+from arekit.common.evaluation.evaluators.utils import check_is_supported
 from arekit.common.opinions.collection import OpinionCollection
 from arekit.contrib.utils.evaluation.results.three_class import ThreeClassEvalResult
 
@@ -27,13 +28,13 @@ def _calc_diff(self, etalon_opins, test_opins, is_label_supported):
             # We keep only those opinions that were not
             # presented in test and has neutral label
 
-            self._check_is_supported(label=opinion.Sentiment, is_label_supported=is_label_supported)
+            check_is_supported(label=opinion.Sentiment, is_label_supported=is_label_supported)
 
             if not test_opins_expanded.has_synonymous_opinion(opinion) and opinion.Sentiment == neut_label:
                 test_opins_expanded.add_opinion(opinion)
 
-        return super(ThreeClassEvaluator, self)._calc_diff(etalon_opins=etalon_opins,
-                                                           test_opins=test_opins_expanded,
+        return super(ThreeClassEvaluator, self)._calc_diff(etalon_data=etalon_opins,
+                                                           test_data=test_opins_expanded,
                                                            is_label_supported=is_label_supported)
 
     def _create_eval_result(self):