Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: external metadata fetch API #16193

Merged
merged 6 commits into from
Aug 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 14 additions & 5 deletions superset-frontend/src/datasource/DatasourceEditor.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
import rison from 'rison';
import React from 'react';
import PropTypes from 'prop-types';
import { Row, Col } from 'src/common/components';
Expand Down Expand Up @@ -485,11 +486,19 @@ class DatasourceEditor extends React.PureComponent {

syncMetadata() {
const { datasource } = this.state;
const endpoint = `/datasource/external_metadata_by_name/${
datasource.type || datasource.datasource_type
}/${datasource.database.database_name || datasource.database.name}/${
datasource.schema
}/${datasource.table_name}/`;
const params = {
datasource_type: datasource.type || datasource.datasource_type,
database_name:
datasource.database.database_name || datasource.database.name,
schema_name: datasource.schema,
table_name: datasource.table_name,
};
const endpoint = `/datasource/external_metadata_by_name/?q=${rison.encode(
// rison can't encode the undefined value
Object.keys(params).map(key =>
params[key] === undefined ? null : params[key],
),
Comment on lines +498 to +500
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't we need to encode the key/value pairs (Object.entries), not just the values (Object.keys)? (honest question, I haven't done a lot of this rison API stuff)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is to code entire params field, just change undefined to null, because prison can't encode undefined value

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes now I see! 🤦

)}`;
this.setState({ metadataLoading: true });

SupersetClient.get({ endpoint })
Expand Down
5 changes: 5 additions & 0 deletions superset/connectors/sqla/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from typing import Dict, List, Optional, TYPE_CHECKING

from flask_babel import lazy_gettext as _
from sqlalchemy.exc import NoSuchTableError
from sqlalchemy.sql.type_api import TypeEngine

from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
Expand All @@ -41,6 +42,10 @@ def get_physical_table_metadata(
db_dialect = database.get_dialect()
# ensure empty schema
_schema_name = schema_name if schema_name else None
# Table does not exist or is not visible to a connection.
if not database.has_table_by_name(table_name, schema=_schema_name):
raise NoSuchTableError

cols = database.get_columns(table_name, schema=_schema_name)
for col in cols:
try:
Expand Down
2 changes: 1 addition & 1 deletion superset/initialization/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def init_views(self) -> None:
DatabaseView,
ExcelToDatabaseView,
)
from superset.views.datasource import Datasource
from superset.views.datasource.views import Datasource
from superset.views.dynamic_plugins import DynamicPluginsView
from superset.views.key_value import KV
from superset.views.log.api import LogRestApi
Expand Down
16 changes: 16 additions & 0 deletions superset/views/datasource/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
54 changes: 54 additions & 0 deletions superset/views/datasource/schemas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import Any

from marshmallow import fields, post_load, Schema
from typing_extensions import TypedDict


class ExternalMetadataParams(TypedDict):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@villebro @zhaoyongjie I wonder if we should be using marshmallow_dataclass instead as it adhere's to the DRY principle and simplifies the code. I've used it for an internal project and I'm definitely a fan.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the mention. It's a more elegant way, let me try to use marshmallow_dataclass to refactor this class.

datasource_type: str
database_name: str
schema_name: str
table_name: str


get_external_metadata_schema = {
"datasource_type": "string",
"database_name": "string",
"schema_name": "string",
"table_name": "string",
}


class ExternalMetadataSchema(Schema):
datasource_type = fields.Str(required=True)
database_name = fields.Str(required=True)
schema_name = fields.Str(allow_none=True)
table_name = fields.Str(required=True)

# pylint: disable=no-self-use,unused-argument
@post_load
def normalize(
self, data: ExternalMetadataParams, **kwargs: Any,
) -> ExternalMetadataParams:
return ExternalMetadataParams(
datasource_type=data["datasource_type"],
database_name=data["database_name"],
schema_name=data.get("schema_name", ""),
table_name=data["table_name"],
)
68 changes: 41 additions & 27 deletions superset/views/datasource.py → superset/views/datasource/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,39 @@
# under the License.
import json
from collections import Counter
from typing import Any

from flask import request
from flask_appbuilder import expose
from flask_appbuilder.api import rison
from flask_appbuilder.security.decorators import has_access_api
from flask_babel import _
from marshmallow import ValidationError
from sqlalchemy.exc import NoSuchTableError
from sqlalchemy.orm.exc import NoResultFound

from superset import app, db, event_logger
from superset.connectors.connector_registry import ConnectorRegistry
from superset.connectors.sqla.utils import get_physical_table_metadata
from superset.datasets.commands.exceptions import DatasetForbiddenError
from superset.datasets.commands.exceptions import (
DatasetForbiddenError,
DatasetNotFoundError,
)
from superset.exceptions import SupersetException, SupersetSecurityException
from superset.models.core import Database
from superset.typing import FlaskResponse
from superset.views.base import check_ownership

from ..utils.core import parse_js_uri_path_item
from .base import api, BaseSupersetView, handle_api_exception, json_error_response
from superset.views.base import (
api,
BaseSupersetView,
check_ownership,
handle_api_exception,
json_error_response,
)
from superset.views.datasource.schemas import (
ExternalMetadataParams,
ExternalMetadataSchema,
get_external_metadata_schema,
)


class Datasource(BaseSupersetView):
Expand Down Expand Up @@ -122,45 +138,43 @@ def external_metadata(
return json_error_response(str(ex), status=400)
return self.json_response(external_metadata)

@expose(
"/external_metadata_by_name/<datasource_type>/<database_name>/"
"<schema_name>/<table_name>/"
)
@expose("/external_metadata_by_name/")
@has_access_api
@api
@handle_api_exception
def external_metadata_by_name(
self,
datasource_type: str,
database_name: str,
schema_name: str,
table_name: str,
) -> FlaskResponse:
@rison(get_external_metadata_schema)
def external_metadata_by_name(self, **kwargs: Any) -> FlaskResponse:
"""Gets table metadata from the source system and SQLAlchemy inspector"""
database_name = parse_js_uri_path_item(database_name) or ""
schema_name = parse_js_uri_path_item(schema_name, eval_undefined=True) or ""
table_name = parse_js_uri_path_item(table_name) or ""
try:
params: ExternalMetadataParams = (
ExternalMetadataSchema().load(kwargs.get("rison"))
)
except ValidationError as err:
return json_error_response(str(err), status=400)

datasource = ConnectorRegistry.get_datasource_by_name(
session=db.session,
datasource_type=datasource_type,
database_name=database_name,
schema=schema_name,
datasource_name=table_name,
datasource_type=params["datasource_type"],
database_name=params["database_name"],
schema=params["schema_name"],
datasource_name=params["table_name"],
)
try:
if datasource is not None:
# Get columns from Superset metadata
external_metadata = datasource.external_metadata()
else:
# Use the SQLAlchemy inspector to get columns
database = (
db.session.query(Database)
.filter_by(database_name=database_name)
.filter_by(database_name=params["database_name"])
.one()
)
external_metadata = get_physical_table_metadata(
database=database, table_name=table_name, schema_name=schema_name,
database=database,
table_name=params["table_name"],
schema_name=params["schema_name"],
)
except SupersetException as ex:
return json_error_response(str(ex), status=400)
except (NoResultFound, NoSuchTableError):
raise DatasetNotFoundError
return self.json_response(external_metadata)
73 changes: 57 additions & 16 deletions tests/integration_tests/datasource_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from contextlib import contextmanager
from unittest import mock

import prison
import pytest

from superset import app, ConnectorRegistry, db
Expand Down Expand Up @@ -90,11 +91,15 @@ def test_external_metadata_for_virtual_table(self):
def test_external_metadata_by_name_for_physical_table(self):
self.login(username="admin")
tbl = self.get_table(name="birth_names")
# empty schema need to be represented by undefined
url = (
f"/datasource/external_metadata_by_name/table/"
f"{tbl.database.database_name}/undefined/{tbl.table_name}/"
params = prison.dumps(
{
"datasource_type": "table",
"database_name": tbl.database.database_name,
"schema_name": tbl.schema,
"table_name": tbl.table_name,
}
)
url = f"/datasource/external_metadata_by_name/?q={params}"
resp = self.get_json_resp(url)
col_names = {o.get("name") for o in resp}
self.assertEqual(
Expand All @@ -112,33 +117,69 @@ def test_external_metadata_by_name_for_virtual_table(self):
session.add(table)
session.commit()

table = self.get_table(name="dummy_sql_table")
# empty schema need to be represented by undefined
url = (
f"/datasource/external_metadata_by_name/table/"
f"{table.database.database_name}/undefined/{table.table_name}/"
tbl = self.get_table(name="dummy_sql_table")
params = prison.dumps(
{
"datasource_type": "table",
"database_name": tbl.database.database_name,
"schema_name": tbl.schema,
"table_name": tbl.table_name,
}
)
url = f"/datasource/external_metadata_by_name/?q={params}"
resp = self.get_json_resp(url)
assert {o.get("name") for o in resp} == {"intcol", "strcol"}
session.delete(table)
session.delete(tbl)
session.commit()

def test_external_metadata_by_name_from_sqla_inspector(self):
self.login(username="admin")
example_database = get_example_database()
with create_test_table_context(example_database):
url = (
f"/datasource/external_metadata_by_name/table/"
f"{example_database.database_name}/undefined/test_table/"
params = prison.dumps(
{
"datasource_type": "table",
"database_name": example_database.database_name,
"table_name": "test_table",
}
)
url = f"/datasource/external_metadata_by_name/?q={params}"
resp = self.get_json_resp(url)
col_names = {o.get("name") for o in resp}
self.assertEqual(col_names, {"first", "second"})

url = (
f"/datasource/external_metadata_by_name/table/" f"foobar/undefined/foobar/"
# No databases found
params = prison.dumps(
{"datasource_type": "table", "database_name": "foo", "table_name": "bar",}
)
url = f"/datasource/external_metadata_by_name/?q={params}"
resp = self.client.get(url)
self.assertEqual(resp.status_code, DatasetNotFoundError.status)
self.assertEqual(
json.loads(resp.data.decode("utf-8")).get("error"),
DatasetNotFoundError.message,
)

# No table found
params = prison.dumps(
{
"datasource_type": "table",
"database_name": example_database.database_name,
"table_name": "fooooooooobarrrrrr",
}
)
url = f"/datasource/external_metadata_by_name/?q={params}"
resp = self.client.get(url)
self.assertEqual(resp.status_code, DatasetNotFoundError.status)
self.assertEqual(
json.loads(resp.data.decode("utf-8")).get("error"),
DatasetNotFoundError.message,
)
resp = self.get_json_resp(url, raise_on_error=False)

# invalid query params
params = prison.dumps({"datasource_type": "table",})
url = f"/datasource/external_metadata_by_name/?q={params}"
resp = self.get_json_resp(url)
self.assertIn("error", resp)

def test_external_metadata_for_virtual_table_template_params(self):
Expand Down