From 0a89166251838d47edb72cbd3c749849de0bb040 Mon Sep 17 00:00:00 2001 From: Yongjie Zhao Date: Thu, 17 Feb 2022 18:12:24 +0800 Subject: [PATCH 1/3] fix: contribution operator meets nan value --- superset/common/query_object.py | 2 ++ .../pandas_postprocessing/contribution.py | 4 +++ .../test_contribution.py | 36 ++++++++++++------- 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/superset/common/query_object.py b/superset/common/query_object.py index 03ee9cb1d3059..2a40155d1ca4f 100644 --- a/superset/common/query_object.py +++ b/superset/common/query_object.py @@ -19,6 +19,7 @@ import logging from datetime import datetime, timedelta +from pprint import pformat from typing import Any, Dict, List, NamedTuple, Optional, TYPE_CHECKING from flask_babel import gettext as _ @@ -395,6 +396,7 @@ def exec_post_processing(self, df: DataFrame) -> DataFrame: :raises QueryObjectValidationError: If the post processing operation is incorrect """ + logger.debug("post_processing: %s", pformat(self.post_processing)) for post_process in self.post_processing: operation = post_process.get("operation") if not operation: diff --git a/superset/utils/pandas_postprocessing/contribution.py b/superset/utils/pandas_postprocessing/contribution.py index d813961fb5f55..f6171d6fef13c 100644 --- a/superset/utils/pandas_postprocessing/contribution.py +++ b/superset/utils/pandas_postprocessing/contribution.py @@ -17,6 +17,7 @@ from decimal import Decimal from typing import List, Optional +import numpy as np from flask_babel import gettext as _ from pandas import DataFrame @@ -49,6 +50,7 @@ def contribution( """ contribution_df = df.copy() numeric_df = contribution_df.select_dtypes(include=["number", Decimal]) + numeric_df.fillna(0, inplace=True) # verify column selections if columns: numeric_columns = numeric_df.columns.tolist() @@ -71,5 +73,7 @@ def contribution( numeric_df = numeric_df[columns] axis = 0 if orientation == PostProcessingContributionOrientation.COLUMN else 1 numeric_df = numeric_df / numeric_df.values.sum(axis=axis, keepdims=True) + # replace infinity and nan with 0 in dataframe + numeric_df.replace(to_replace=[np.Inf, -np.Inf, np.nan], value=0, inplace=True) contribution_df[rename_columns] = numeric_df return contribution_df diff --git a/tests/unit_tests/pandas_postprocessing/test_contribution.py b/tests/unit_tests/pandas_postprocessing/test_contribution.py index 78212cbe5b851..dfedde676eb36 100644 --- a/tests/unit_tests/pandas_postprocessing/test_contribution.py +++ b/tests/unit_tests/pandas_postprocessing/test_contribution.py @@ -18,6 +18,8 @@ from datetime import datetime import pytest +from numpy import nan +from numpy.testing import assert_array_equal from pandas import DataFrame from superset.exceptions import QueryObjectValidationError @@ -28,9 +30,14 @@ def test_contribution(): df = DataFrame( { - DTTM_ALIAS: [datetime(2020, 7, 16, 14, 49), datetime(2020, 7, 16, 14, 50),], - "a": [1, 3], - "b": [1, 9], + DTTM_ALIAS: [ + datetime(2020, 7, 16, 14, 49), + datetime(2020, 7, 16, 14, 50), + datetime(2020, 7, 16, 14, 51), + ], + "a": [1, 3, nan], + "b": [1, 9, nan], + "c": [nan, nan, nan], } ) with pytest.raises(QueryObjectValidationError, match="not numeric"): @@ -43,18 +50,20 @@ def test_contribution(): processed_df = contribution( df, orientation=PostProcessingContributionOrientation.ROW, ) - assert processed_df.columns.tolist() == [DTTM_ALIAS, "a", "b"] - assert processed_df["a"].tolist() == [0.5, 0.25] - assert processed_df["b"].tolist() == [0.5, 0.75] + assert processed_df.columns.tolist() == [DTTM_ALIAS, "a", "b", "c"] + assert processed_df["a"].tolist() == [0.5, 0.25, 0] + assert processed_df["b"].tolist() == [0.5, 0.75, 0] + assert processed_df["c"].tolist() == [0, 0, 0] # cell contribution across column without temporal column df.pop(DTTM_ALIAS) processed_df = contribution( df, orientation=PostProcessingContributionOrientation.COLUMN ) - assert processed_df.columns.tolist() == ["a", "b"] - assert processed_df["a"].tolist() == [0.25, 0.75] - assert processed_df["b"].tolist() == [0.1, 0.9] + assert processed_df.columns.tolist() == ["a", "b", "c"] + assert processed_df["a"].tolist() == [0.25, 0.75, 0] + assert processed_df["b"].tolist() == [0.1, 0.9, 0] + assert processed_df["c"].tolist() == [0, 0, 0] # contribution only on selected columns processed_df = contribution( @@ -63,7 +72,8 @@ def test_contribution(): columns=["a"], rename_columns=["pct_a"], ) - assert processed_df.columns.tolist() == ["a", "b", "pct_a"] - assert processed_df["a"].tolist() == [1, 3] - assert processed_df["b"].tolist() == [1, 9] - assert processed_df["pct_a"].tolist() == [0.25, 0.75] + assert processed_df.columns.tolist() == ["a", "b", "c", "pct_a"] + assert_array_equal(processed_df["a"].tolist(), [1, 3, nan]) + assert_array_equal(processed_df["b"].tolist(), [1, 9, nan]) + assert_array_equal(processed_df["c"].tolist(), [nan, nan, nan]) + assert processed_df["pct_a"].tolist() == [0.25, 0.75, 0] From 320790d2cc79acd87c0702d14d6dac4eaba6a8e9 Mon Sep 17 00:00:00 2001 From: Yongjie Zhao Date: Fri, 18 Feb 2022 22:05:36 +0800 Subject: [PATCH 2/3] fix ut --- superset/utils/pandas_postprocessing/contribution.py | 3 --- .../pandas_postprocessing/test_contribution.py | 12 ++++++------ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/superset/utils/pandas_postprocessing/contribution.py b/superset/utils/pandas_postprocessing/contribution.py index f6171d6fef13c..7097ea33355af 100644 --- a/superset/utils/pandas_postprocessing/contribution.py +++ b/superset/utils/pandas_postprocessing/contribution.py @@ -17,7 +17,6 @@ from decimal import Decimal from typing import List, Optional -import numpy as np from flask_babel import gettext as _ from pandas import DataFrame @@ -73,7 +72,5 @@ def contribution( numeric_df = numeric_df[columns] axis = 0 if orientation == PostProcessingContributionOrientation.COLUMN else 1 numeric_df = numeric_df / numeric_df.values.sum(axis=axis, keepdims=True) - # replace infinity and nan with 0 in dataframe - numeric_df.replace(to_replace=[np.Inf, -np.Inf, np.nan], value=0, inplace=True) contribution_df[rename_columns] = numeric_df return contribution_df diff --git a/tests/unit_tests/pandas_postprocessing/test_contribution.py b/tests/unit_tests/pandas_postprocessing/test_contribution.py index dfedde676eb36..9d2df76971116 100644 --- a/tests/unit_tests/pandas_postprocessing/test_contribution.py +++ b/tests/unit_tests/pandas_postprocessing/test_contribution.py @@ -51,9 +51,9 @@ def test_contribution(): df, orientation=PostProcessingContributionOrientation.ROW, ) assert processed_df.columns.tolist() == [DTTM_ALIAS, "a", "b", "c"] - assert processed_df["a"].tolist() == [0.5, 0.25, 0] - assert processed_df["b"].tolist() == [0.5, 0.75, 0] - assert processed_df["c"].tolist() == [0, 0, 0] + assert_array_equal(processed_df["a"].tolist(), [0.5, 0.25, nan]) + assert_array_equal(processed_df["b"].tolist(), [0.5, 0.75, nan]) + assert_array_equal(processed_df["c"].tolist(), [0, 0, nan]) # cell contribution across column without temporal column df.pop(DTTM_ALIAS) @@ -61,9 +61,9 @@ def test_contribution(): df, orientation=PostProcessingContributionOrientation.COLUMN ) assert processed_df.columns.tolist() == ["a", "b", "c"] - assert processed_df["a"].tolist() == [0.25, 0.75, 0] - assert processed_df["b"].tolist() == [0.1, 0.9, 0] - assert processed_df["c"].tolist() == [0, 0, 0] + assert_array_equal(processed_df["a"].tolist(), [0.25, 0.75, 0]) + assert_array_equal(processed_df["b"].tolist(), [0.1, 0.9, 0]) + assert_array_equal(processed_df["c"].tolist(), [nan, nan, nan]) # contribution only on selected columns processed_df = contribution( From f8e8f73fea62c9b776353d682f7181bc0b9d2592 Mon Sep 17 00:00:00 2001 From: Yongjie Zhao Date: Fri, 18 Feb 2022 22:12:13 +0800 Subject: [PATCH 3/3] rename FE --- .../plugin-chart-echarts/src/Timeseries/Area/controlPanel.tsx | 4 ++-- .../src/Timeseries/Regular/Bar/controlPanel.tsx | 4 ++-- .../src/Timeseries/Regular/controlPanel.tsx | 4 ++-- .../plugin-chart-echarts/src/Timeseries/Step/controlPanel.tsx | 4 ++-- .../plugin-chart-echarts/src/Timeseries/controlPanel.tsx | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Area/controlPanel.tsx b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Area/controlPanel.tsx index 720eb9428723d..bae735b692bad 100644 --- a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Area/controlPanel.tsx +++ b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Area/controlPanel.tsx @@ -72,10 +72,10 @@ const config: ControlPanelConfig = { default: contributionMode, choices: [ [null, 'None'], - [EchartsTimeseriesContributionType.Row, 'Total'], + [EchartsTimeseriesContributionType.Row, 'Row'], [EchartsTimeseriesContributionType.Column, 'Series'], ], - description: t('Calculate contribution per series or total'), + description: t('Calculate contribution per series or row'), }, }, ], diff --git a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/Bar/controlPanel.tsx b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/Bar/controlPanel.tsx index 96cb0b6bda60d..08d09a0147272 100644 --- a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/Bar/controlPanel.tsx +++ b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/Bar/controlPanel.tsx @@ -69,10 +69,10 @@ const config: ControlPanelConfig = { default: contributionMode, choices: [ [null, 'None'], - [EchartsTimeseriesContributionType.Row, 'Total'], + [EchartsTimeseriesContributionType.Row, 'Row'], [EchartsTimeseriesContributionType.Column, 'Series'], ], - description: t('Calculate contribution per series or total'), + description: t('Calculate contribution per series or row'), }, }, ], diff --git a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/controlPanel.tsx b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/controlPanel.tsx index 14edc341f2bab..701c3a57ff561 100644 --- a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/controlPanel.tsx +++ b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/controlPanel.tsx @@ -66,10 +66,10 @@ const config: ControlPanelConfig = { default: contributionMode, choices: [ [null, 'None'], - [EchartsTimeseriesContributionType.Row, 'Total'], + [EchartsTimeseriesContributionType.Row, 'Row'], [EchartsTimeseriesContributionType.Column, 'Series'], ], - description: t('Calculate contribution per series or total'), + description: t('Calculate contribution per series or row'), }, }, ], diff --git a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Step/controlPanel.tsx b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Step/controlPanel.tsx index 8178e4929782a..8b40330730af5 100644 --- a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Step/controlPanel.tsx +++ b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Step/controlPanel.tsx @@ -72,10 +72,10 @@ const config: ControlPanelConfig = { default: contributionMode, choices: [ [null, 'None'], - [EchartsTimeseriesContributionType.Row, 'Total'], + [EchartsTimeseriesContributionType.Row, 'Row'], [EchartsTimeseriesContributionType.Column, 'Series'], ], - description: t('Calculate contribution per series or total'), + description: t('Calculate contribution per series or row'), }, }, ], diff --git a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/controlPanel.tsx b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/controlPanel.tsx index 1012b2f6e7df2..f7fd56b7fa577 100644 --- a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/controlPanel.tsx +++ b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/controlPanel.tsx @@ -73,10 +73,10 @@ const config: ControlPanelConfig = { default: contributionMode, choices: [ [null, 'None'], - [EchartsTimeseriesContributionType.Row, 'Total'], + [EchartsTimeseriesContributionType.Row, 'Row'], [EchartsTimeseriesContributionType.Column, 'Series'], ], - description: t('Calculate contribution per series or total'), + description: t('Calculate contribution per series or row'), }, }, ],