Skip to content

Commit

Permalink
refactor: external metadata fetch API (apache#16193)
Browse files Browse the repository at this point in the history
* refactor: external metadata api

* fix comments

* fix ut

* fix fe lint

* fix UT

* fix UT
  • Loading branch information
zhaoyongjie authored and Emmanuel Bavoux committed Nov 14, 2021
1 parent 503e6ac commit 856faa5
Show file tree
Hide file tree
Showing 7 changed files with 188 additions and 49 deletions.
19 changes: 14 additions & 5 deletions superset-frontend/src/datasource/DatasourceEditor.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
import rison from 'rison';
import React from 'react';
import PropTypes from 'prop-types';
import { Row, Col } from 'src/common/components';
Expand Down Expand Up @@ -485,11 +486,19 @@ class DatasourceEditor extends React.PureComponent {

syncMetadata() {
const { datasource } = this.state;
const endpoint = `/datasource/external_metadata_by_name/${
datasource.type || datasource.datasource_type
}/${datasource.database.database_name || datasource.database.name}/${
datasource.schema
}/${datasource.table_name}/`;
const params = {
datasource_type: datasource.type || datasource.datasource_type,
database_name:
datasource.database.database_name || datasource.database.name,
schema_name: datasource.schema,
table_name: datasource.table_name,
};
const endpoint = `/datasource/external_metadata_by_name/?q=${rison.encode(
// rison can't encode the undefined value
Object.keys(params).map(key =>
params[key] === undefined ? null : params[key],
),
)}`;
this.setState({ metadataLoading: true });

SupersetClient.get({ endpoint })
Expand Down
5 changes: 5 additions & 0 deletions superset/connectors/sqla/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from typing import Dict, List, Optional, TYPE_CHECKING

from flask_babel import lazy_gettext as _
from sqlalchemy.exc import NoSuchTableError
from sqlalchemy.sql.type_api import TypeEngine

from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
Expand All @@ -41,6 +42,10 @@ def get_physical_table_metadata(
db_dialect = database.get_dialect()
# ensure empty schema
_schema_name = schema_name if schema_name else None
# Table does not exist or is not visible to a connection.
if not database.has_table_by_name(table_name, schema=_schema_name):
raise NoSuchTableError

cols = database.get_columns(table_name, schema=_schema_name)
for col in cols:
try:
Expand Down
2 changes: 1 addition & 1 deletion superset/initialization/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def init_views(self) -> None:
DatabaseView,
ExcelToDatabaseView,
)
from superset.views.datasource import Datasource
from superset.views.datasource.views import Datasource
from superset.views.dynamic_plugins import DynamicPluginsView
from superset.views.key_value import KV
from superset.views.log.api import LogRestApi
Expand Down
16 changes: 16 additions & 0 deletions superset/views/datasource/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
54 changes: 54 additions & 0 deletions superset/views/datasource/schemas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import Any

from marshmallow import fields, post_load, Schema
from typing_extensions import TypedDict


class ExternalMetadataParams(TypedDict):
datasource_type: str
database_name: str
schema_name: str
table_name: str


get_external_metadata_schema = {
"datasource_type": "string",
"database_name": "string",
"schema_name": "string",
"table_name": "string",
}


class ExternalMetadataSchema(Schema):
datasource_type = fields.Str(required=True)
database_name = fields.Str(required=True)
schema_name = fields.Str(allow_none=True)
table_name = fields.Str(required=True)

# pylint: disable=no-self-use,unused-argument
@post_load
def normalize(
self, data: ExternalMetadataParams, **kwargs: Any,
) -> ExternalMetadataParams:
return ExternalMetadataParams(
datasource_type=data["datasource_type"],
database_name=data["database_name"],
schema_name=data.get("schema_name", ""),
table_name=data["table_name"],
)
68 changes: 41 additions & 27 deletions superset/views/datasource.py → superset/views/datasource/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,39 @@
# under the License.
import json
from collections import Counter
from typing import Any

from flask import request
from flask_appbuilder import expose
from flask_appbuilder.api import rison
from flask_appbuilder.security.decorators import has_access_api
from flask_babel import _
from marshmallow import ValidationError
from sqlalchemy.exc import NoSuchTableError
from sqlalchemy.orm.exc import NoResultFound

from superset import app, db, event_logger
from superset.connectors.connector_registry import ConnectorRegistry
from superset.connectors.sqla.utils import get_physical_table_metadata
from superset.datasets.commands.exceptions import DatasetForbiddenError
from superset.datasets.commands.exceptions import (
DatasetForbiddenError,
DatasetNotFoundError,
)
from superset.exceptions import SupersetException, SupersetSecurityException
from superset.models.core import Database
from superset.typing import FlaskResponse
from superset.views.base import check_ownership

from ..utils.core import parse_js_uri_path_item
from .base import api, BaseSupersetView, handle_api_exception, json_error_response
from superset.views.base import (
api,
BaseSupersetView,
check_ownership,
handle_api_exception,
json_error_response,
)
from superset.views.datasource.schemas import (
ExternalMetadataParams,
ExternalMetadataSchema,
get_external_metadata_schema,
)


class Datasource(BaseSupersetView):
Expand Down Expand Up @@ -122,45 +138,43 @@ def external_metadata(
return json_error_response(str(ex), status=400)
return self.json_response(external_metadata)

@expose(
"/external_metadata_by_name/<datasource_type>/<database_name>/"
"<schema_name>/<table_name>/"
)
@expose("/external_metadata_by_name/")
@has_access_api
@api
@handle_api_exception
def external_metadata_by_name(
self,
datasource_type: str,
database_name: str,
schema_name: str,
table_name: str,
) -> FlaskResponse:
@rison(get_external_metadata_schema)
def external_metadata_by_name(self, **kwargs: Any) -> FlaskResponse:
"""Gets table metadata from the source system and SQLAlchemy inspector"""
database_name = parse_js_uri_path_item(database_name) or ""
schema_name = parse_js_uri_path_item(schema_name, eval_undefined=True) or ""
table_name = parse_js_uri_path_item(table_name) or ""
try:
params: ExternalMetadataParams = (
ExternalMetadataSchema().load(kwargs.get("rison"))
)
except ValidationError as err:
return json_error_response(str(err), status=400)

datasource = ConnectorRegistry.get_datasource_by_name(
session=db.session,
datasource_type=datasource_type,
database_name=database_name,
schema=schema_name,
datasource_name=table_name,
datasource_type=params["datasource_type"],
database_name=params["database_name"],
schema=params["schema_name"],
datasource_name=params["table_name"],
)
try:
if datasource is not None:
# Get columns from Superset metadata
external_metadata = datasource.external_metadata()
else:
# Use the SQLAlchemy inspector to get columns
database = (
db.session.query(Database)
.filter_by(database_name=database_name)
.filter_by(database_name=params["database_name"])
.one()
)
external_metadata = get_physical_table_metadata(
database=database, table_name=table_name, schema_name=schema_name,
database=database,
table_name=params["table_name"],
schema_name=params["schema_name"],
)
except SupersetException as ex:
return json_error_response(str(ex), status=400)
except (NoResultFound, NoSuchTableError):
raise DatasetNotFoundError
return self.json_response(external_metadata)
73 changes: 57 additions & 16 deletions tests/integration_tests/datasource_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from contextlib import contextmanager
from unittest import mock

import prison
import pytest

from superset import app, ConnectorRegistry, db
Expand Down Expand Up @@ -90,11 +91,15 @@ def test_external_metadata_for_virtual_table(self):
def test_external_metadata_by_name_for_physical_table(self):
self.login(username="admin")
tbl = self.get_table(name="birth_names")
# empty schema need to be represented by undefined
url = (
f"/datasource/external_metadata_by_name/table/"
f"{tbl.database.database_name}/undefined/{tbl.table_name}/"
params = prison.dumps(
{
"datasource_type": "table",
"database_name": tbl.database.database_name,
"schema_name": tbl.schema,
"table_name": tbl.table_name,
}
)
url = f"/datasource/external_metadata_by_name/?q={params}"
resp = self.get_json_resp(url)
col_names = {o.get("name") for o in resp}
self.assertEqual(
Expand All @@ -112,33 +117,69 @@ def test_external_metadata_by_name_for_virtual_table(self):
session.add(table)
session.commit()

table = self.get_table(name="dummy_sql_table")
# empty schema need to be represented by undefined
url = (
f"/datasource/external_metadata_by_name/table/"
f"{table.database.database_name}/undefined/{table.table_name}/"
tbl = self.get_table(name="dummy_sql_table")
params = prison.dumps(
{
"datasource_type": "table",
"database_name": tbl.database.database_name,
"schema_name": tbl.schema,
"table_name": tbl.table_name,
}
)
url = f"/datasource/external_metadata_by_name/?q={params}"
resp = self.get_json_resp(url)
assert {o.get("name") for o in resp} == {"intcol", "strcol"}
session.delete(table)
session.delete(tbl)
session.commit()

def test_external_metadata_by_name_from_sqla_inspector(self):
self.login(username="admin")
example_database = get_example_database()
with create_test_table_context(example_database):
url = (
f"/datasource/external_metadata_by_name/table/"
f"{example_database.database_name}/undefined/test_table/"
params = prison.dumps(
{
"datasource_type": "table",
"database_name": example_database.database_name,
"table_name": "test_table",
}
)
url = f"/datasource/external_metadata_by_name/?q={params}"
resp = self.get_json_resp(url)
col_names = {o.get("name") for o in resp}
self.assertEqual(col_names, {"first", "second"})

url = (
f"/datasource/external_metadata_by_name/table/" f"foobar/undefined/foobar/"
# No databases found
params = prison.dumps(
{"datasource_type": "table", "database_name": "foo", "table_name": "bar",}
)
url = f"/datasource/external_metadata_by_name/?q={params}"
resp = self.client.get(url)
self.assertEqual(resp.status_code, DatasetNotFoundError.status)
self.assertEqual(
json.loads(resp.data.decode("utf-8")).get("error"),
DatasetNotFoundError.message,
)

# No table found
params = prison.dumps(
{
"datasource_type": "table",
"database_name": example_database.database_name,
"table_name": "fooooooooobarrrrrr",
}
)
url = f"/datasource/external_metadata_by_name/?q={params}"
resp = self.client.get(url)
self.assertEqual(resp.status_code, DatasetNotFoundError.status)
self.assertEqual(
json.loads(resp.data.decode("utf-8")).get("error"),
DatasetNotFoundError.message,
)
resp = self.get_json_resp(url, raise_on_error=False)

# invalid query params
params = prison.dumps({"datasource_type": "table",})
url = f"/datasource/external_metadata_by_name/?q={params}"
resp = self.get_json_resp(url)
self.assertIn("error", resp)

def test_external_metadata_for_virtual_table_template_params(self):
Expand Down

0 comments on commit 856faa5

Please sign in to comment.