From 03535f1a778f4ed4bc5b99ecc2d20ad922ac1ee4 Mon Sep 17 00:00:00 2001 From: Yongjie Zhao Date: Thu, 27 Jan 2022 18:36:13 +0800 Subject: [PATCH] fix: null value and empty string in filter --- .../index.tsx | 7 ++- superset/connectors/base/models.py | 8 +-- superset/connectors/druid/models.py | 4 +- superset/connectors/sqla/models.py | 11 +--- superset/constants.py | 1 + superset/views/core.py | 4 +- tests/integration_tests/sqla_models_tests.py | 56 +++++++++++++++---- 7 files changed, 60 insertions(+), 31 deletions(-) diff --git a/superset-frontend/src/explore/components/controls/FilterControl/AdhocFilterEditPopoverSimpleTabContent/index.tsx b/superset-frontend/src/explore/components/controls/FilterControl/AdhocFilterEditPopoverSimpleTabContent/index.tsx index 42baf74bdd1cf..192c5eb3cba5c 100644 --- a/superset-frontend/src/explore/components/controls/FilterControl/AdhocFilterEditPopoverSimpleTabContent/index.tsx +++ b/superset-frontend/src/explore/components/controls/FilterControl/AdhocFilterEditPopoverSimpleTabContent/index.tsx @@ -38,6 +38,7 @@ import AdhocFilter, { } from 'src/explore/components/controls/FilterControl/AdhocFilter'; import { Input } from 'src/common/components'; import { propertyComparator } from 'src/components/Select/Select'; +import { optionLabel } from 'src/utils/common'; const StyledInput = styled(Input)` margin-bottom: ${({ theme }) => theme.gridUnit * 4}px; @@ -346,7 +347,11 @@ const AdhocFilterEditPopoverSimpleTabContent: React.FC = props => { endpoint: `/superset/filter/${datasource.type}/${datasource.id}/${col}/`, }) .then(({ json }) => { - setSuggestions(json); + setSuggestions( + json.map((item: null | number | boolean | string) => + optionLabel(item), + ), + ); setLoadingComparatorSuggestions(false); }) .catch(() => { diff --git a/superset/connectors/base/models.py b/superset/connectors/base/models.py index f8a3261805e7c..46abdc625ae3a 100644 --- a/superset/connectors/base/models.py +++ b/superset/connectors/base/models.py @@ -25,7 +25,7 @@ from sqlalchemy.orm import foreign, Query, relationship, RelationshipProperty, Session from superset import is_feature_enabled, security_manager -from superset.constants import NULL_STRING +from superset.constants import EMPTY_STRING, NULL_STRING from superset.datasets.commands.exceptions import DatasetNotFoundError from superset.models.helpers import AuditMixinNullable, ImportExportMixin, QueryResult from superset.models.slice import Slice @@ -404,7 +404,7 @@ def handle_single_value(value: Optional[FilterValue]) -> Optional[FilterValue]: return utils.cast_to_num(value) if value == NULL_STRING: return None - if value == "": + if value == EMPTY_STRING: return "" if target_column_type == utils.GenericDataType.BOOLEAN: return utils.cast_to_boolean(value) @@ -439,9 +439,7 @@ def query(self, query_obj: QueryObjectDict) -> QueryResult: """ raise NotImplementedError() - def values_for_column( - self, column_name: str, limit: int = 10000, contain_null: bool = True, - ) -> List[Any]: + def values_for_column(self, column_name: str, limit: int = 10000,) -> List[Any]: """Given a column, returns an iterable of distinct values This is used to populate the dropdown showing a list of diff --git a/superset/connectors/druid/models.py b/superset/connectors/druid/models.py index aa86d7ab89383..14df4eaeca4b0 100644 --- a/superset/connectors/druid/models.py +++ b/superset/connectors/druid/models.py @@ -948,9 +948,7 @@ def metrics_and_post_aggs( ) return aggs, post_aggs - def values_for_column( - self, column_name: str, limit: int = 10000, contain_null: bool = True, - ) -> List[Any]: + def values_for_column(self, column_name: str, limit: int = 10000,) -> List[Any]: """Retrieve some values for the given column""" logger.info( "Getting values for columns [{}] limited to [{}]".format(column_name, limit) diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 22516934db139..49acc69eff0bc 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -176,9 +176,7 @@ def query(self, query_obj: QueryObjectDict) -> QueryResult: def get_query_str(self, query_obj: QueryObjectDict) -> str: raise NotImplementedError() - def values_for_column( - self, column_name: str, limit: int = 10000, contain_null: bool = True, - ) -> List[Any]: + def values_for_column(self, column_name: str, limit: int = 10000,) -> List[Any]: raise NotImplementedError() @@ -738,9 +736,7 @@ def get_fetch_values_predicate(self) -> TextClause: ) ) from ex - def values_for_column( - self, column_name: str, limit: int = 10000, contain_null: bool = True, - ) -> List[Any]: + def values_for_column(self, column_name: str, limit: int = 10000) -> List[Any]: """Runs query against sqla to retrieve some sample values for the given column. """ @@ -756,9 +752,6 @@ def values_for_column( if limit: qry = qry.limit(limit) - if not contain_null: - qry = qry.where(target_col.get_sqla_col().isnot(None)) - if self.fetch_values_predicate: qry = qry.where(self.get_fetch_values_predicate()) diff --git a/superset/constants.py b/superset/constants.py index 1e02bcc0cba71..7cfa72e4b36b4 100644 --- a/superset/constants.py +++ b/superset/constants.py @@ -21,6 +21,7 @@ from enum import Enum NULL_STRING = "" +EMPTY_STRING = "" CHANGE_ME_SECRET_KEY = "CHANGE_ME_TO_A_COMPLEX_RANDOM_SECRET" diff --git a/superset/views/core.py b/superset/views/core.py index ca7dae3ee51e7..984b9205f3af9 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -921,9 +921,7 @@ def filter( # pylint: disable=no-self-use datasource.raise_for_access() row_limit = apply_max_row_limit(config["FILTER_SELECT_ROW_LIMIT"]) payload = json.dumps( - datasource.values_for_column( - column_name=column, limit=row_limit, contain_null=False, - ), + datasource.values_for_column(column_name=column, limit=row_limit), default=utils.json_int_dttm_ser, ignore_nan=True, ) diff --git a/tests/integration_tests/sqla_models_tests.py b/tests/integration_tests/sqla_models_tests.py index eb5450780032a..4961ae073a6ae 100644 --- a/tests/integration_tests/sqla_models_tests.py +++ b/tests/integration_tests/sqla_models_tests.py @@ -30,7 +30,8 @@ from sqlalchemy.sql.elements import TextClause from superset import db -from superset.connectors.sqla.models import SqlaTable, TableColumn +from superset.connectors.sqla.models import SqlaTable, TableColumn, SqlMetric +from superset.constants import EMPTY_STRING, NULL_STRING from superset.db_engine_specs.bigquery import BigQueryEngineSpec from superset.db_engine_specs.druid import DruidEngineSpec from superset.exceptions import QueryObjectValidationError @@ -477,22 +478,57 @@ def test_labels_expected_on_mutated_query(self): def test_values_for_column(self): table = SqlaTable( table_name="test_null_in_column", - sql="SELECT 'foo' as foo UNION SELECT 'bar' UNION SELECT NULL", + sql=( + "SELECT 'foo' as foo " + "UNION SELECT '' " + "UNION SELECT NULL " + "UNION SELECT 'null'" + ), database=get_example_database(), ) TableColumn(column_name="foo", type="VARCHAR(255)", table=table) + SqlMetric(metric_name="count", expression="count(*)", table=table) - with_null = table.values_for_column( - column_name="foo", limit=10000, contain_null=True - ) + # null value, empty string and text should be retrieved + with_null = table.values_for_column(column_name="foo", limit=10000) assert None in with_null - assert len(with_null) == 3 + assert len(with_null) == 4 + + # null value should be replaced + result_object = table.query( + { + "metrics": ["count"], + "filter": [{"col": "foo", "val": [NULL_STRING], "op": "IN"}], + "is_timeseries": False, + } + ) + assert result_object.df["count"][0] == 1 + + # empty string should be replaced + result_object = table.query( + { + "metrics": ["count"], + "filter": [{"col": "foo", "val": [EMPTY_STRING], "op": "IN"}], + "is_timeseries": False, + } + ) + assert result_object.df["count"][0] == 1 - without_null = table.values_for_column( - column_name="foo", limit=10000, contain_null=False + # both replaced + result_object = table.query( + { + "metrics": ["count"], + "filter": [ + { + "col": "foo", + "val": [EMPTY_STRING, NULL_STRING, "null", "foo"], + "op": "IN", + } + ], + "is_timeseries": False, + } ) - assert None not in without_null - assert len(without_null) == 2 + assert result_object.df["count"][0] == 4 @pytest.mark.parametrize(