Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(QueryContext): validation does not validate query_context metrics #19753

Draft
wants to merge 21 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
ef1fef6
add database property to query_context to be used by query_context_va…
ofekisr Apr 10, 2022
92cd6af
add datasource property to query_object to be used by query_context_v…
ofekisr Apr 10, 2022
b836667
add query_context_validator unit
ofekisr Apr 11, 2022
b7900e1
refactor(security_manager): extract to method to prevent code reusing
ofekisr Apr 12, 2022
3f92bbd
add new class methods to fetch datasets by sql components
ofekisr Apr 12, 2022
071ccd4
refactor(query_context): decouple query_context from processor
ofekisr Apr 12, 2022
01e4f02
add query object generator method
ofekisr Apr 12, 2022
aa45c1e
add query_context_access_validators
ofekisr Apr 13, 2022
58e64c7
add query_context validator as old behavior
ofekisr Apr 13, 2022
d0bdfa9
add validator factory to be used by command factory (next-commit)
ofekisr Apr 18, 2022
fc10245
add command factory to be used by api.py
ofekisr Apr 18, 2022
186a4b6
add new single_column_example table for tests setups
ofekisr Apr 18, 2022
b810a33
add new feature flag for indicating of using new query context valida…
ofekisr Apr 18, 2022
ae99fe7
add new query_context_validation to the chart data flows
ofekisr Apr 18, 2022
119a9ae
fix lint
ofekisr Apr 18, 2022
e278ce6
fix failed tests on postgress
ofekisr Apr 18, 2022
fd7eb45
fix failed tests
ofekisr Apr 18, 2022
7ee4698
fix failed tests
ofekisr Apr 19, 2022
a9b9291
fix failed tests
ofekisr Apr 20, 2022
6ba9a35
fix failed tests
ofekisr Apr 20, 2022
19f8874
add skip test if backend is presto/hive
ofekisr Apr 24, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 15 additions & 8 deletions superset/charts/data/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,16 @@
from superset.views.base import CsvResponse, generate_download_headers
from superset.views.base_api import statsd_metrics

from .commands.command_factory import GetChartDataCommandFactory

if TYPE_CHECKING:
from superset.common.query_context import QueryContext

logger = logging.getLogger(__name__)


class ChartDataRestApi(ChartRestApi):

include_route_methods = {"get_data", "data", "data_from_cache"}

@expose("/<int:pk>/data/", methods=["GET"])
Expand Down Expand Up @@ -133,7 +136,7 @@

try:
query_context = self._create_query_context_from_form(json_body)
command = ChartDataCommand(query_context)
command = GetChartDataCommandFactory.make(query_context)
command.validate()
except QueryObjectValidationError as error:
return self.response_400(message=error.message)
Expand All @@ -150,15 +153,17 @@
and query_context.result_format == ChartDataResultFormat.JSON
and query_context.result_type == ChartDataResultType.FULL
):
return self._run_async(json_body, command)
return self._run_async(json_body, command) # type: ignore

Check warning on line 156 in superset/charts/data/api.py

View check run for this annotation

Codecov / codecov/patch

superset/charts/data/api.py#L156

Added line #L156 was not covered by tests

try:
form_data = json.loads(chart.params)
except (TypeError, json.decoder.JSONDecodeError):
form_data = {}

return self._get_data_response(
command=command, form_data=form_data, datasource=query_context.datasource
command=command, # type: ignore
form_data=form_data,
datasource=query_context.datasource,
)

@expose("/data", methods=["POST"])
Expand Down Expand Up @@ -221,7 +226,7 @@

try:
query_context = self._create_query_context_from_form(json_body)
command = ChartDataCommand(query_context)
command = GetChartDataCommandFactory.make(query_context)
command.validate()
except QueryObjectValidationError as error:
return self.response_400(message=error.message)
Expand All @@ -238,11 +243,13 @@
and query_context.result_format == ChartDataResultFormat.JSON
and query_context.result_type == ChartDataResultType.FULL
):
return self._run_async(json_body, command)
return self._run_async(json_body, command) # type: ignore

form_data = json_body.get("form_data")
return self._get_data_response(
command, form_data=form_data, datasource=query_context.datasource
command, # type: ignore
form_data=form_data,
datasource=query_context.datasource,
)

@expose("/data/<cache_key>", methods=["GET"])
Expand Down Expand Up @@ -288,7 +295,7 @@
try:
cached_data = self._load_query_context_form_from_cache(cache_key)
query_context = self._create_query_context_from_form(cached_data)
command = ChartDataCommand(query_context)
command = GetChartDataCommandFactory.make(query_context)
command.validate()
except ChartDataCacheLoadError:
return self.response_404()
Expand All @@ -297,7 +304,7 @@
message=_("Request is incorrect: %(error)s", error=error.messages)
)

return self._get_data_response(command, True)
return self._get_data_response(command, True) # type: ignore

def _run_async(
self, form_data: Dict[str, Any], command: ChartDataCommand
Expand Down
55 changes: 55 additions & 0 deletions superset/charts/data/commands/command_factory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations

from typing import ClassVar, TYPE_CHECKING

from .get_data_command import ChartDataCommand

if TYPE_CHECKING:
from superset.commands.base import BaseCommand
from superset.common.query_context import QueryContext

Check warning on line 25 in superset/charts/data/commands/command_factory.py

View check run for this annotation

Codecov / codecov/patch

superset/charts/data/commands/command_factory.py#L24-L25

Added lines #L24 - L25 were not covered by tests

from ..query_context_validators.validaor_factory import QueryContextValidatorFactory

Check warning on line 27 in superset/charts/data/commands/command_factory.py

View check run for this annotation

Codecov / codecov/patch

superset/charts/data/commands/command_factory.py#L27

Added line #L27 was not covered by tests
Copy link
Member

@dpgaspar dpgaspar May 18, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We avoid using relative imports
there's seems to be a typo also: validaor_factory



class GetChartDataCommandFactory:
_instance: ClassVar[GetChartDataCommandFactory]

_validator_factory: QueryContextValidatorFactory

@classmethod
def init(cls, validator_factory: QueryContextValidatorFactory) -> None:
cls._instance = GetChartDataCommandFactory(validator_factory)

def __init__(self, validator_factory: QueryContextValidatorFactory):
self._validator_factory = validator_factory

@classmethod
def make(cls, query_context: QueryContext) -> BaseCommand:
if cls._instance is None:
raise RuntimeError("GetChartDataCommandFactory was not initialized")

Check warning on line 45 in superset/charts/data/commands/command_factory.py

View check run for this annotation

Codecov / codecov/patch

superset/charts/data/commands/command_factory.py#L45

Added line #L45 was not covered by tests
Comment on lines +35 to +45
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we really need to make so many constructors?

return cls._instance._make(query_context)

def _make(self, query_context: QueryContext) -> BaseCommand:
validator = self._validator_factory.make(self._is_use_sql_db(query_context))
return ChartDataCommand(query_context, validator)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering if this factory abstraction is necessary for the validation or if this is something we can just put into the command?


def _is_use_sql_db( # pylint: disable=no-self-use
self, query_context: QueryContext
) -> bool:
return query_context.datasource.type == "table"
13 changes: 10 additions & 3 deletions superset/charts/data/commands/get_data_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations

import logging
from typing import Any, Dict
from typing import Any, Dict, TYPE_CHECKING

from superset.charts.commands.exceptions import (
ChartDataCacheLoadError,
Expand All @@ -27,12 +29,17 @@

logger = logging.getLogger(__name__)

if TYPE_CHECKING:
from ..query_context_validators import QueryContextValidator

Check warning on line 33 in superset/charts/data/commands/get_data_command.py

View check run for this annotation

Codecov / codecov/patch

superset/charts/data/commands/get_data_command.py#L33

Added line #L33 was not covered by tests


class ChartDataCommand(BaseCommand):
_query_context: QueryContext
_validator: QueryContextValidator

def __init__(self, query_context: QueryContext):
def __init__(self, query_context: QueryContext, validator: QueryContextValidator):
self._query_context = query_context
self._validator = validator

def run(self, **kwargs: Any) -> Dict[str, Any]:
# caching is handled in query_context.get_df_payload
Expand Down Expand Up @@ -61,4 +68,4 @@
return return_value

def validate(self) -> None:
self._query_context.raise_for_access()
self._validator.validate(self._query_context)
29 changes: 29 additions & 0 deletions superset/charts/data/query_context_validators/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from superset.common.query_context import QueryContext

Check warning on line 23 in superset/charts/data/query_context_validators/__init__.py

View check run for this annotation

Codecov / codecov/patch

superset/charts/data/query_context_validators/__init__.py#L23

Added line #L23 was not covered by tests


class QueryContextValidator(ABC): # pylint: disable=too-few-public-methods
@abstractmethod
def validate(self, query_context: QueryContext) -> None:
...

Check warning on line 29 in superset/charts/data/query_context_validators/__init__.py

View check run for this annotation

Codecov / codecov/patch

superset/charts/data/query_context_validators/__init__.py#L29

Added line #L29 was not covered by tests
169 changes: 169 additions & 0 deletions superset/charts/data/query_context_validators/access_validators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations

from abc import ABC
from typing import List, Optional, Set, TYPE_CHECKING

from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
from superset.exceptions import SupersetSecurityException
from superset.sql_parse import ParsedQuery, Table

from . import QueryContextValidator

if TYPE_CHECKING:
from superset import SupersetSecurityManager
from superset.common.query_context import QueryContext
from superset.common.query_object import QueryObject
from superset.connectors.base.models import BaseDatasource
from superset.connectors.sqla.models import SqlaTable
from superset.datasets.dao import DatasetDAO
from superset.models.core import Database
from superset.superset_typing import Metric

Check warning on line 36 in superset/charts/data/query_context_validators/access_validators.py

View check run for this annotation

Codecov / codecov/patch

superset/charts/data/query_context_validators/access_validators.py#L29-L36

Added lines #L29 - L36 were not covered by tests


# pylint: disable=too-few-public-methods
class QueryContextAccessValidator(QueryContextValidator, ABC):
_security_manager: SupersetSecurityManager

def __init__(self, security_manager: SupersetSecurityManager):
self._security_manager = security_manager


# pylint: disable=too-few-public-methods
class SecurityManagerWrapper(QueryContextAccessValidator):
def validate(self, query_context: QueryContext) -> None:
self._security_manager.raise_for_access(query_context=query_context)

Check warning on line 50 in superset/charts/data/query_context_validators/access_validators.py

View check run for this annotation

Codecov / codecov/patch

superset/charts/data/query_context_validators/access_validators.py#L50

Added line #L50 was not covered by tests


# pylint: disable=too-few-public-methods
class SqlDatabaseBasedAccessValidator(QueryContextAccessValidator):
_dataset_dao: DatasetDAO

def __init__(
self, security_manager: SupersetSecurityManager, dataset_dao: DatasetDAO
):
super().__init__(security_manager)
self._dataset_dao = dataset_dao

def validate(self, query_context: QueryContext) -> None:
sql_database = query_context.get_database()
if not self._actor_can_access_all_database_data(sql_database): # type: ignore
self._validate_actor_can_access_datasources_context(
query_context, sql_database # type: ignore
)

# pylint: disable=invalid-name
def _actor_can_access_all_database_data(self, sql_database: Database) -> bool:
return (
self._security_manager.can_access_all_databases()
or self._security_manager.can_access_all_datasources()
or self._security_manager.can_access_database(sql_database)
)

# pylint: disable=invalid-name
def _validate_actor_can_access_datasources_context(
self, query_context: QueryContext, sql_database: Database
) -> None:
datasources = self._get_all_related_datasources(query_context, sql_database)
for datasource in datasources:
self._security_manager.raise_for_datasource_access(datasource)

def _get_all_related_datasources(
self, query_context: QueryContext, sql_database: Database
) -> Set[BaseDatasource]:

datasources: Set[BaseDatasource] = set()
datasources.add(query_context.datasource)
datasources.update(
self._collect_datasources_from_queries(query_context.queries, sql_database)
)
return datasources

# pylint: disable=invalid-name
def _collect_datasources_from_queries(
self, queries: List[QueryObject], sql_db: Database
) -> Set[BaseDatasource]:
datasources: Set[BaseDatasource] = set()
for query in queries:
datasources.update(self._collect_datasources_from_query(query, sql_db))
return datasources

def _collect_datasources_from_query(
self, query: QueryObject, database: Database
) -> Set[BaseDatasource]:

datasources: Set[BaseDatasource] = set()
datasource = query.get_datasource()
if datasource is not None:
datasources.add(query.datasource) # type: ignore
if query.metrics:
datasources.update(
self._collect_datasources_from_metrics(query.metrics, database)
)
return datasources

# pylint: disable=invalid-name
def _collect_datasources_from_metrics(
self, metrics: List[Metric], database: Database
) -> Set[BaseDatasource]:
datasources: Set[BaseDatasource] = set()
for metric in metrics:
datasources.update(self._find_datasources_in_metric(metric, database))
return datasources

def _find_datasources_in_metric(
self, metric: Metric, database: Database
) -> Set[BaseDatasource]:
sql_expression = self._get_sql_expression_from_metric(metric)
if sql_expression:
return self._determine_datasources(sql_expression, database)
return set()

# pylint: disable=invalid-name
@staticmethod
def _get_sql_expression_from_metric(metric: Metric) -> Optional[str]:
if isinstance(metric, dict) and "sqlExpression" in metric:
return metric["sqlExpression"]
return None

def _determine_datasources(
self, sqlExpression: str, database: Database
) -> Set[BaseDatasource]:
datasources = set()
for table in ParsedQuery(sqlExpression).tables:
datasources.update(self._get_datasources_from_table(database, table))
return datasources

def _get_datasources_from_table(
self, database: Database, table: Table
) -> List[SqlaTable]:
table_datasources = self._dataset_dao.get_by_sql_database_components(
database, table.table, table.schema
)
if len(table_datasources) == 0:
raise SupersetSecurityException(

Check warning on line 159 in superset/charts/data/query_context_validators/access_validators.py

View check run for this annotation

Codecov / codecov/patch

superset/charts/data/query_context_validators/access_validators.py#L159

Added line #L159 was not covered by tests
SupersetError(
"datasources not "
"found, thus the "
"table is internal db "
"table ",
SupersetErrorType.DATASOURCE_SECURITY_ACCESS_ERROR,
ErrorLevel.ERROR,
)
)
return table_datasources
Loading