From 2a43de055007f5f6cd99c8ecb47737fb9a3b8679 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Thu, 13 Jul 2023 13:23:38 +0100 Subject: [PATCH 1/8] fix: dataset safe URL for explore_url --- .../src/pages/DatasetList/index.tsx | 31 +++++++++++++++---- superset/config.py | 2 +- superset/datasets/commands/exceptions.py | 17 ---------- superset/datasets/commands/update.py | 8 ----- superset/utils/urls.py | 15 --------- superset/views/base.py | 1 + superset/views/datasource/views.py | 15 --------- tests/unit_tests/utils/urls_tests.py | 24 -------------- 8 files changed, 27 insertions(+), 86 deletions(-) diff --git a/superset-frontend/src/pages/DatasetList/index.tsx b/superset-frontend/src/pages/DatasetList/index.tsx index 43913a828029c..42d75f3ecc7d5 100644 --- a/superset-frontend/src/pages/DatasetList/index.tsx +++ b/superset-frontend/src/pages/DatasetList/index.tsx @@ -29,7 +29,7 @@ import React, { useMemo, useCallback, } from 'react'; -import { useHistory } from 'react-router-dom'; +import {Link, useHistory} from 'react-router-dom'; import rison from 'rison'; import { createFetchRelated, @@ -69,6 +69,7 @@ import { CONFIRM_OVERWRITE_MESSAGE, } from 'src/features/datasets/constants'; import DuplicateDatasetModal from 'src/features/datasets/DuplicateDatasetModal'; +import { useSelector } from 'react-redux'; const extensionsRegistry = getExtensionsRegistry(); const DatasetDeleteRelatedExtension = extensionsRegistry.get( @@ -123,6 +124,10 @@ type Dataset = { table_name: string; }; +interface DatasetListConfig { + PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET: boolean; +} + interface VirtualDataset extends Dataset { extra: Record; sql: string; @@ -181,6 +186,11 @@ const DatasetList: FunctionComponent = ({ setSSHTunnelPrivateKeyPasswordFields, ] = useState([]); + const PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET = useSelector< + any, + DatasetListConfig + >(state => state.common?.conf?.PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET); + const openDatasetImportModal = () => { showImportModal(true); }; @@ -309,11 +319,20 @@ const DatasetList: FunctionComponent = ({ }, }, }: any) => { - const titleLink = ( - // exploreUrl can be a link to Explore or an external link - // in the first case use SPA routing, else use HTML anchor - {datasetTitle} - ); + let titleLink: JSX.Element; + if (PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET) { + titleLink = ( + + {datasetTitle} + + ); + } else { + titleLink = ( + // exploreUrl can be a link to Explore or an external link + // in the first case use SPA routing, else use HTML anchor + {datasetTitle} + ); + } try { const parsedExtra = JSON.parse(extra); return ( diff --git a/superset/config.py b/superset/config.py index 4a7434093c9b1..8e206d39e87a0 100644 --- a/superset/config.py +++ b/superset/config.py @@ -1445,7 +1445,7 @@ def EMAIL_HEADER_MUTATOR( # pylint: disable=invalid-name,unused-argument # Typically these should not be allowed. PREVENT_UNSAFE_DB_CONNECTIONS = True -# Prevents unsafe default endpoints to be registered on datasets. +# If true all default urls on datasets will be handled as relative URLs by the frontend PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET = True # Define a list of allowed URLs for dataset data imports (v1). diff --git a/superset/datasets/commands/exceptions.py b/superset/datasets/commands/exceptions.py index fe9fe94cc61a3..f135294980835 100644 --- a/superset/datasets/commands/exceptions.py +++ b/superset/datasets/commands/exceptions.py @@ -61,23 +61,6 @@ def __init__(self, table_name: str) -> None: ) -class DatasetEndpointUnsafeValidationError(ValidationError): - """ - Marshmallow validation error for unsafe dataset default endpoint - """ - - def __init__(self) -> None: - super().__init__( - [ - _( - "The submitted URL is not considered safe," - " only use URLs with the same domain as Superset." - ) - ], - field_name="default_endpoint", - ) - - class DatasetColumnNotFoundValidationError(ValidationError): """ Marshmallow validation error when dataset column for update does not exist diff --git a/superset/datasets/commands/update.py b/superset/datasets/commands/update.py index 1636805567b52..3d716452f486a 100644 --- a/superset/datasets/commands/update.py +++ b/superset/datasets/commands/update.py @@ -32,7 +32,6 @@ DatasetColumnNotFoundValidationError, DatasetColumnsDuplicateValidationError, DatasetColumnsExistsValidationError, - DatasetEndpointUnsafeValidationError, DatasetExistsValidationError, DatasetForbiddenError, DatasetInvalidError, @@ -43,7 +42,6 @@ DatasetUpdateFailedError, ) from superset.exceptions import SupersetSecurityException -from superset.utils.urls import is_safe_url logger = logging.getLogger(__name__) @@ -106,12 +104,6 @@ def validate(self) -> None: exceptions.append(ex) # Validate default URL safety default_endpoint = self._properties.get("default_endpoint") - if ( - default_endpoint - and not is_safe_url(default_endpoint) - and current_app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"] - ): - exceptions.append(DatasetEndpointUnsafeValidationError()) # Validate columns if columns := self._properties.get("columns"): diff --git a/superset/utils/urls.py b/superset/utils/urls.py index 31fbd89337af4..dc49bc4b7b19c 100644 --- a/superset/utils/urls.py +++ b/superset/utils/urls.py @@ -52,18 +52,3 @@ def modify_url_query(url: str, **kwargs: Any) -> str: f"{k}={urllib.parse.quote(str(v[0]))}" for k, v in params.items() ) return urllib.parse.urlunsplit(parts) - - -def is_safe_url(url: str) -> bool: - if url.startswith("///"): - return False - try: - ref_url = urlparse(request.host_url) - test_url = urlparse(url) - except ValueError: - return False - if unicodedata.category(url[0])[0] == "C": - return False - if test_url.scheme != ref_url.scheme or ref_url.netloc != test_url.netloc: - return False - return True diff --git a/superset/views/base.py b/superset/views/base.py index 8574a7f6d5b50..08258b3cbbfcb 100644 --- a/superset/views/base.py +++ b/superset/views/base.py @@ -120,6 +120,7 @@ "ALERT_REPORTS_DEFAULT_RETENTION", "ALERT_REPORTS_DEFAULT_WORKING_TIMEOUT", "NATIVE_FILTER_DEFAULT_ROW_LIMIT", + "PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET", ) logger = logging.getLogger(__name__) diff --git a/superset/views/datasource/views.py b/superset/views/datasource/views.py index f1086acd47330..d3512aab74e39 100644 --- a/superset/views/datasource/views.py +++ b/superset/views/datasource/views.py @@ -40,7 +40,6 @@ from superset.models.core import Database from superset.superset_typing import FlaskResponse from superset.utils.core import DatasourceType -from superset.utils.urls import is_safe_url from superset.views.base import ( api, BaseSupersetView, @@ -80,20 +79,6 @@ def save(self) -> FlaskResponse: datasource_id = datasource_dict.get("id") datasource_type = datasource_dict.get("type") database_id = datasource_dict["database"].get("id") - default_endpoint = datasource_dict["default_endpoint"] - if ( - default_endpoint - and not is_safe_url(default_endpoint) - and current_app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"] - ): - return json_error_response( - _( - "The submitted URL is not considered safe," - " only use URLs with the same domain as Superset." - ), - status=400, - ) - orm_datasource = DatasourceDAO.get_datasource( db.session, DatasourceType(datasource_type), datasource_id ) diff --git a/tests/unit_tests/utils/urls_tests.py b/tests/unit_tests/utils/urls_tests.py index 287f346c3d99d..8ead2dacb4ed0 100644 --- a/tests/unit_tests/utils/urls_tests.py +++ b/tests/unit_tests/utils/urls_tests.py @@ -39,27 +39,3 @@ def test_convert_dashboard_link() -> None: def test_convert_dashboard_link_with_integer() -> None: test_url = modify_url_query(EXPLORE_DASHBOARD_LINK, standalone=0) assert test_url == "http://localhost:9000/superset/dashboard/3/?standalone=0" - - -@pytest.mark.parametrize( - "url,is_safe", - [ - ("http://localhost/", True), - ("http://localhost/superset/1", True), - ("https://localhost/", False), - ("https://localhost/superset/1", False), - ("localhost/superset/1", False), - ("ftp://localhost/superset/1", False), - ("http://external.com", False), - ("https://external.com", False), - ("external.com", False), - ("///localhost", False), - ("xpto://localhost:[3/1/", False), - ], -) -def test_is_safe_url(url: str, is_safe: bool) -> None: - from superset import app - from superset.utils.urls import is_safe_url - - with app.test_request_context("/"): - assert is_safe_url(url) == is_safe From d9ae149ec33bafeafcf4980490f38743d7f5efaa Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Thu, 13 Jul 2023 13:33:31 +0100 Subject: [PATCH 2/8] update updating --- UPDATING.md | 1 + 1 file changed, 1 insertion(+) diff --git a/UPDATING.md b/UPDATING.md index abda7ee5150db..d7589f9076a09 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -41,6 +41,7 @@ assists people when migrating to a new version. ### Breaking Changes +- [24686]https://github.com/apache/superset/pull/24686): All dataset's custom explore_url are handled as relative URLs on the frontend, behaviour controlled by PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET. - [24262](https://github.com/apache/superset/pull/24262): Enabled `TALISMAN_ENABLED` flag by default and provided stricter default Content Security Policy - [24415](https://github.com/apache/superset/pull/24415): Removed the obsolete Druid NoSQL REGEX operator. - [24423](https://github.com/apache/superset/pull/24423): Removed deprecated APIs `/superset/slice_json/...`, `/superset/annotation_json/...` From dfe09386da95481d4c819b38d92054ad73250172 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Thu, 13 Jul 2023 14:01:07 +0100 Subject: [PATCH 3/8] lint and remove unused tests --- superset/datasets/commands/update.py | 4 ---- superset/utils/urls.py | 4 +--- superset/views/datasource/views.py | 2 +- tests/integration_tests/datasource_tests.py | 26 --------------------- 4 files changed, 2 insertions(+), 34 deletions(-) diff --git a/superset/datasets/commands/update.py b/superset/datasets/commands/update.py index 3d716452f486a..a38439fb7f235 100644 --- a/superset/datasets/commands/update.py +++ b/superset/datasets/commands/update.py @@ -18,7 +18,6 @@ from collections import Counter from typing import Any, Optional -from flask import current_app from flask_appbuilder.models.sqla import Model from marshmallow import ValidationError @@ -102,9 +101,6 @@ def validate(self) -> None: self._properties["owners"] = owners except ValidationError as ex: exceptions.append(ex) - # Validate default URL safety - default_endpoint = self._properties.get("default_endpoint") - # Validate columns if columns := self._properties.get("columns"): self._validate_columns(columns, exceptions) diff --git a/superset/utils/urls.py b/superset/utils/urls.py index dc49bc4b7b19c..57a1b63dd41d3 100644 --- a/superset/utils/urls.py +++ b/superset/utils/urls.py @@ -14,12 +14,10 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -import unicodedata import urllib from typing import Any -from urllib.parse import urlparse -from flask import current_app, request, url_for +from flask import current_app, url_for def get_url_host(user_friendly: bool = False) -> str: diff --git a/superset/views/datasource/views.py b/superset/views/datasource/views.py index d3512aab74e39..5b910644ac39c 100644 --- a/superset/views/datasource/views.py +++ b/superset/views/datasource/views.py @@ -18,7 +18,7 @@ from collections import Counter from typing import Any -from flask import current_app, redirect, request +from flask import redirect, request from flask_appbuilder import expose, permission_name from flask_appbuilder.api import rison from flask_appbuilder.security.decorators import has_access, has_access_api diff --git a/tests/integration_tests/datasource_tests.py b/tests/integration_tests/datasource_tests.py index 5de1cf6ef85e1..37a136a15c413 100644 --- a/tests/integration_tests/datasource_tests.py +++ b/tests/integration_tests/datasource_tests.py @@ -297,32 +297,6 @@ def test_save(self): print(k) self.assertEqual(resp[k], datasource_post[k]) - def test_save_default_endpoint_validation_fail(self): - self.login(username="admin") - tbl_id = self.get_table(name="birth_names").id - - datasource_post = get_datasource_post() - datasource_post["id"] = tbl_id - datasource_post["owners"] = [1] - datasource_post["default_endpoint"] = "http://www.google.com" - data = dict(data=json.dumps(datasource_post)) - resp = self.client.post("/datasource/save/", data=data) - assert resp.status_code == 400 - - def test_save_default_endpoint_validation_unsafe(self): - self.app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"] = False - self.login(username="admin") - tbl_id = self.get_table(name="birth_names").id - - datasource_post = get_datasource_post() - datasource_post["id"] = tbl_id - datasource_post["owners"] = [1] - datasource_post["default_endpoint"] = "http://www.google.com" - data = dict(data=json.dumps(datasource_post)) - resp = self.client.post("/datasource/save/", data=data) - assert resp.status_code == 200 - self.app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"] = True - def test_save_default_endpoint_validation_success(self): self.login(username="admin") tbl_id = self.get_table(name="birth_names").id From f4775f530fb7a2aa2b50f1886490861a9dff955d Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Thu, 13 Jul 2023 14:10:12 +0100 Subject: [PATCH 4/8] fe lint --- superset-frontend/src/pages/DatasetList/index.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset-frontend/src/pages/DatasetList/index.tsx b/superset-frontend/src/pages/DatasetList/index.tsx index 42d75f3ecc7d5..d980e5170a17e 100644 --- a/superset-frontend/src/pages/DatasetList/index.tsx +++ b/superset-frontend/src/pages/DatasetList/index.tsx @@ -29,7 +29,7 @@ import React, { useMemo, useCallback, } from 'react'; -import {Link, useHistory} from 'react-router-dom'; +import { Link, useHistory } from 'react-router-dom'; import rison from 'rison'; import { createFetchRelated, From f6460586dd339a4d9148eb644bce9f4f475902c5 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Thu, 13 Jul 2023 14:22:03 +0100 Subject: [PATCH 5/8] improve code --- superset-frontend/src/pages/DatasetList/index.tsx | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/superset-frontend/src/pages/DatasetList/index.tsx b/superset-frontend/src/pages/DatasetList/index.tsx index d980e5170a17e..ed1ec29aa91b9 100644 --- a/superset-frontend/src/pages/DatasetList/index.tsx +++ b/superset-frontend/src/pages/DatasetList/index.tsx @@ -124,10 +124,6 @@ type Dataset = { table_name: string; }; -interface DatasetListConfig { - PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET: boolean; -} - interface VirtualDataset extends Dataset { extra: Record; sql: string; @@ -186,10 +182,13 @@ const DatasetList: FunctionComponent = ({ setSSHTunnelPrivateKeyPasswordFields, ] = useState([]); - const PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET = useSelector< + const PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET: boolean = useSelector< any, - DatasetListConfig - >(state => state.common?.conf?.PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET); + boolean + >( + state => + state.common?.conf?.PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET || false, + ); const openDatasetImportModal = () => { showImportModal(true); From f4e016768a3836b5dc04f3bf769188a164978089 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Thu, 13 Jul 2023 16:02:52 +0100 Subject: [PATCH 6/8] improve code --- .../src/pages/DatasetList/index.tsx | 2 +- tests/integration_tests/datasets/api_tests.py | 26 ------------------- 2 files changed, 1 insertion(+), 27 deletions(-) diff --git a/superset-frontend/src/pages/DatasetList/index.tsx b/superset-frontend/src/pages/DatasetList/index.tsx index ed1ec29aa91b9..6fecccf64a194 100644 --- a/superset-frontend/src/pages/DatasetList/index.tsx +++ b/superset-frontend/src/pages/DatasetList/index.tsx @@ -182,7 +182,7 @@ const DatasetList: FunctionComponent = ({ setSSHTunnelPrivateKeyPasswordFields, ] = useState([]); - const PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET: boolean = useSelector< + const PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET = useSelector< any, boolean >( diff --git a/tests/integration_tests/datasets/api_tests.py b/tests/integration_tests/datasets/api_tests.py index 027002507aece..e85ca369b9b8d 100644 --- a/tests/integration_tests/datasets/api_tests.py +++ b/tests/integration_tests/datasets/api_tests.py @@ -1416,32 +1416,6 @@ def test_update_dataset_item_uniqueness(self): db.session.delete(ab_user) db.session.commit() - def test_update_dataset_unsafe_default_endpoint(self): - """ - Dataset API: Test unsafe default endpoint - """ - if backend() == "sqlite": - return - - dataset = self.insert_default_dataset() - self.login(username="admin") - uri = f"api/v1/dataset/{dataset.id}" - table_data = {"default_endpoint": "http://www.google.com"} - rv = self.client.put(uri, json=table_data) - data = json.loads(rv.data.decode("utf-8")) - assert rv.status_code == 422 - expected_response = { - "message": { - "default_endpoint": [ - "The submitted URL is not considered safe," - " only use URLs with the same domain as Superset." - ] - } - } - assert data == expected_response - db.session.delete(dataset) - db.session.commit() - @patch("superset.daos.dataset.DatasetDAO.update") def test_update_dataset_sqlalchemy_error(self, mock_dao_update): """ From b6236f1123f3dde2c2453c14ec2f015c122821b0 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Fri, 14 Jul 2023 13:27:38 +0100 Subject: [PATCH 7/8] fe lint --- superset-frontend/src/pages/DatasetList/index.tsx | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/superset-frontend/src/pages/DatasetList/index.tsx b/superset-frontend/src/pages/DatasetList/index.tsx index 6fecccf64a194..5727bd9f00234 100644 --- a/superset-frontend/src/pages/DatasetList/index.tsx +++ b/superset-frontend/src/pages/DatasetList/index.tsx @@ -182,10 +182,7 @@ const DatasetList: FunctionComponent = ({ setSSHTunnelPrivateKeyPasswordFields, ] = useState([]); - const PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET = useSelector< - any, - boolean - >( + const PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET = useSelector( state => state.common?.conf?.PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET || false, ); From 0107d5bea376c1d12944240bd45d0c67fec2c5ef Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Mon, 14 Aug 2023 13:56:34 +0100 Subject: [PATCH 8/8] add FE tests --- .../pages/DatasetList/DatasetList.test.tsx | 60 ++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/superset-frontend/src/pages/DatasetList/DatasetList.test.tsx b/superset-frontend/src/pages/DatasetList/DatasetList.test.tsx index 358a5fcfcca34..117386f2a8182 100644 --- a/superset-frontend/src/pages/DatasetList/DatasetList.test.tsx +++ b/superset-frontend/src/pages/DatasetList/DatasetList.test.tsx @@ -35,6 +35,7 @@ import IndeterminateCheckbox from 'src/components/IndeterminateCheckbox'; import waitForComponentToPaint from 'spec/helpers/waitForComponentToPaint'; import { act } from 'react-dom/test-utils'; import SubMenu from 'src/features/home/SubMenu'; +import * as reactRedux from 'react-redux'; // store needed for withToasts(DatasetList) const mockStore = configureStore([thunk]); @@ -47,13 +48,15 @@ const datasetsDuplicateEndpoint = 'glob:*/api/v1/dataset/duplicate*'; const databaseEndpoint = 'glob:*/api/v1/dataset/related/database*'; const datasetsEndpoint = 'glob:*/api/v1/dataset/?*'; +const useSelectorMock = jest.spyOn(reactRedux, 'useSelector'); + const mockdatasets = [...new Array(3)].map((_, i) => ({ changed_by_name: 'user', kind: i === 0 ? 'virtual' : 'physical', // ensure there is 1 virtual changed_by: 'user', changed_on: new Date().toISOString(), database_name: `db ${i}`, - explore_url: `/explore/?datasource_type=table&datasource_id=${i}`, + explore_url: `https://www.google.com?${i}`, id: i, schema: `schema ${i}`, table_name: `coolest table ${i}`, @@ -280,3 +283,58 @@ describe('RTL', () => { expect(importTooltip).toBeInTheDocument(); }); }); + +describe('Prevent unsafe URLs', () => { + const mockedProps = {}; + let wrapper: any; + + it('Check prevent unsafe is on renders relative links', async () => { + const tdColumnsNumber = 9; + useSelectorMock.mockReturnValue(true); + wrapper = await mountAndWait(mockedProps); + const tdElements = wrapper.find(ListView).find('td'); + expect( + tdElements + .at(0 * tdColumnsNumber + 1) + .find('a') + .prop('href'), + ).toBe('/https://www.google.com?0'); + expect( + tdElements + .at(1 * tdColumnsNumber + 1) + .find('a') + .prop('href'), + ).toBe('/https://www.google.com?1'); + expect( + tdElements + .at(2 * tdColumnsNumber + 1) + .find('a') + .prop('href'), + ).toBe('/https://www.google.com?2'); + }); + + it('Check prevent unsafe is off renders absolute links', async () => { + const tdColumnsNumber = 9; + useSelectorMock.mockReturnValue(false); + wrapper = await mountAndWait(mockedProps); + const tdElements = wrapper.find(ListView).find('td'); + expect( + tdElements + .at(0 * tdColumnsNumber + 1) + .find('a') + .prop('href'), + ).toBe('https://www.google.com?0'); + expect( + tdElements + .at(1 * tdColumnsNumber + 1) + .find('a') + .prop('href'), + ).toBe('https://www.google.com?1'); + expect( + tdElements + .at(2 * tdColumnsNumber + 1) + .find('a') + .prop('href'), + ).toBe('https://www.google.com?2'); + }); +});