Skip to content

Commit

Permalink
fix(sqla): make text clause escaping optional (#17641)
Browse files Browse the repository at this point in the history
  • Loading branch information
villebro authored Dec 3, 2021
1 parent 73e7928 commit b2ffa26
Show file tree
Hide file tree
Showing 7 changed files with 178 additions and 77 deletions.
37 changes: 18 additions & 19 deletions superset/connectors/sqla/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,16 +105,6 @@
VIRTUAL_TABLE_ALIAS = "virtual_table"


def text(clause: str) -> TextClause:
"""
SQLALchemy wrapper to ensure text clauses are escaped properly
:param clause: clause potentially containing colons
:return: text clause with escaped colons
"""
return sa.text(clause.replace(":", "\\:"))


class SqlaQuery(NamedTuple):
applied_template_filters: List[str]
extra_cache_keys: List[Any]
Expand Down Expand Up @@ -307,18 +297,24 @@ def get_time_filter(
l = []
if start_dttm:
l.append(
col >= text(self.dttm_sql_literal(start_dttm, time_range_endpoints))
col
>= self.table.text(
self.dttm_sql_literal(start_dttm, time_range_endpoints)
)
)
if end_dttm:
if (
time_range_endpoints
and time_range_endpoints[1] == utils.TimeRangeEndpoint.EXCLUSIVE
):
l.append(
col < text(self.dttm_sql_literal(end_dttm, time_range_endpoints))
col
< self.table.text(
self.dttm_sql_literal(end_dttm, time_range_endpoints)
)
)
else:
l.append(col <= text(self.dttm_sql_literal(end_dttm, None)))
l.append(col <= self.table.text(self.dttm_sql_literal(end_dttm, None)))
return and_(*l)

def get_timestamp_expression(
Expand Down Expand Up @@ -733,7 +729,7 @@ def extra_dict(self) -> Dict[str, Any]:
def get_fetch_values_predicate(self) -> TextClause:
tp = self.get_template_processor()
try:
return text(tp.process_template(self.fetch_values_predicate))
return self.text(tp.process_template(self.fetch_values_predicate))
except TemplateError as ex:
raise QueryObjectValidationError(
_(
Expand Down Expand Up @@ -828,7 +824,7 @@ def get_from_clause(
raise QueryObjectValidationError(
_("Virtual dataset query must be read-only")
)
return TextAsFrom(text(from_sql), []).alias(VIRTUAL_TABLE_ALIAS)
return TextAsFrom(self.text(from_sql), []).alias(VIRTUAL_TABLE_ALIAS)

def get_rendered_sql(
self, template_processor: Optional[BaseTemplateProcessor] = None
Expand Down Expand Up @@ -970,7 +966,7 @@ def _get_sqla_row_level_filters(
filters_grouped: Dict[Union[int, str], List[str]] = defaultdict(list)
try:
for filter_ in security_manager.get_rls_filters(self):
clause = text(
clause = self.text(
f"({template_processor.process_template(filter_.clause)})"
)
filters_grouped[filter_.group_key or filter_.id].append(clause)
Expand All @@ -980,6 +976,9 @@ def _get_sqla_row_level_filters(
_("Error in jinja expression in RLS filters: %(msg)s", msg=ex.message,)
) from ex

def text(self, clause: str) -> TextClause:
return self.db_engine_spec.get_text_clause(clause)

def get_sqla_query( # pylint: disable=too-many-arguments,too-many-locals,too-many-branches,too-many-statements
self,
apply_fetch_values_predicate: bool = False,
Expand Down Expand Up @@ -1340,7 +1339,7 @@ def get_sqla_query( # pylint: disable=too-many-arguments,too-many-locals,too-ma
msg=ex.message,
)
) from ex
where_clause_and += [text(f"({where})")]
where_clause_and += [self.text(f"({where})")]
having = extras.get("having")
if having:
try:
Expand All @@ -1352,7 +1351,7 @@ def get_sqla_query( # pylint: disable=too-many-arguments,too-many-locals,too-ma
msg=ex.message,
)
) from ex
having_clause_and += [text(f"({having})")]
having_clause_and += [self.text(f"({having})")]
if apply_fetch_values_predicate and self.fetch_values_predicate:
qry = qry.where(self.get_fetch_values_predicate())
if granularity:
Expand Down Expand Up @@ -1541,7 +1540,7 @@ def _normalize_prequery_result_type(
)

if sql:
value = text(sql)
value = self.text(sql)

return value

Expand Down
1 change: 1 addition & 0 deletions superset/db_engine_specs/athena.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
class AthenaEngineSpec(BaseEngineSpec):
engine = "awsathena"
engine_name = "Amazon Athena"
allows_escaped_colons = False

_time_grain_expressions = {
None: "{col}",
Expand Down
15 changes: 14 additions & 1 deletion superset/db_engine_specs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.orm import Session
from sqlalchemy.sql import quoted_name, text
from sqlalchemy.sql.expression import ColumnClause, Select, TextAsFrom
from sqlalchemy.sql.expression import ColumnClause, Select, TextAsFrom, TextClause
from sqlalchemy.types import String, TypeEngine, UnicodeText
from typing_extensions import TypedDict

Expand Down Expand Up @@ -279,6 +279,7 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
allows_alias_in_select = True
allows_alias_in_orderby = True
allows_sql_comments = True
allows_escaped_colons = True

# Whether ORDER BY clause can use aliases created in SELECT
# that are the same as a source column
Expand Down Expand Up @@ -338,6 +339,18 @@ def get_allow_cost_estimate( # pylint: disable=unused-argument
) -> bool:
return False

@classmethod
def get_text_clause(cls, clause: str) -> TextClause:
"""
SQLALchemy wrapper to ensure text clauses are escaped properly
:param clause: string clause with potentially unescaped characters
:return: text clause with escaped characters
"""
if cls.allows_escaped_colons:
clause = clause.replace(":", "\\:")
return text(clause)

@classmethod
def get_engine(
cls,
Expand Down
57 changes: 0 additions & 57 deletions tests/integration_tests/db_engine_specs/athena_tests.py

This file was deleted.

87 changes: 87 additions & 0 deletions tests/unit_tests/db_engine_specs/test_athena.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=unused-argument, import-outside-toplevel, protected-access
import re
from datetime import datetime

from flask.ctx import AppContext

from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
from tests.unit_tests.fixtures.common import dttm

SYNTAX_ERROR_REGEX = re.compile(
": mismatched input '(?P<syntax_error>.*?)'. Expecting: "
)


def test_convert_dttm(app_context: AppContext, dttm: datetime) -> None:
"""
Test that date objects are converted correctly.
"""

from superset.db_engine_specs.athena import AthenaEngineSpec

assert (
AthenaEngineSpec.convert_dttm("DATE", dttm) == "from_iso8601_date('2019-01-02')"
)

assert (
AthenaEngineSpec.convert_dttm("TIMESTAMP", dttm)
== "from_iso8601_timestamp('2019-01-02T03:04:05.678900')"
)


def test_extract_errors(app_context: AppContext) -> None:
"""
Test that custom error messages are extracted correctly.
"""

from superset.db_engine_specs.athena import AthenaEngineSpec

msg = ": mismatched input 'fromm'. Expecting: "
result = AthenaEngineSpec.extract_errors(Exception(msg))
assert result == [
SupersetError(
message='Please check your query for syntax errors at or near "fromm". Then, try running your query again.',
error_type=SupersetErrorType.SYNTAX_ERROR,
level=ErrorLevel.ERROR,
extra={
"engine_name": "Amazon Athena",
"issue_codes": [
{
"code": 1030,
"message": "Issue 1030 - The query has a syntax error.",
}
],
},
)
]


def test_get_text_clause_with_colon(app_context: AppContext) -> None:
"""
Make sure text clauses don't escape the colon character
"""

from superset.db_engine_specs.athena import AthenaEngineSpec

query = (
"SELECT foo FROM tbl WHERE "
"abc >= from_iso8601_timestamp('2021-11-26T00\:00\:00.000000')"
)
text_clause = AthenaEngineSpec.get_text_clause(query)
assert text_clause.text == query
33 changes: 33 additions & 0 deletions tests/unit_tests/db_engine_specs/test_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=unused-argument, import-outside-toplevel, protected-access
import re

from flask.ctx import AppContext


def test_get_text_clause_with_colon(app_context: AppContext) -> None:
"""
Make sure text clauses are correctly escaped
"""

from superset.db_engine_specs.base import BaseEngineSpec

text_clause = BaseEngineSpec.get_text_clause(
"SELECT foo FROM tbl WHERE foo = '123:456')"
)
assert text_clause.text == "SELECT foo FROM tbl WHERE foo = '123\\:456')"
25 changes: 25 additions & 0 deletions tests/unit_tests/fixtures/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

from datetime import datetime

import pytest


@pytest.fixture
def dttm() -> datetime:
return datetime.strptime("2019-01-02 03:04:05.678900", "%Y-%m-%d %H:%M:%S.%f")

0 comments on commit b2ffa26

Please sign in to comment.