Skip to content

Commit

Permalink
fix(sqllab/charts): casting from timestamp[us] to timestamp[ns] would…
Browse files Browse the repository at this point in the history
… result in out of bounds timestamp (#18873)

* fix casting from timestamp[us] to timestamp[ns] would result in out of bounds timestamp from sqllab and charts

* Add unittests

* Lint changes and parameter variable rename

* Fix linting

(cherry picked from commit 8b72354)
  • Loading branch information
yeachan153 authored and michael-s-molina committed May 26, 2022
1 parent 9ca53b8 commit 8becd3e
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 3 deletions.
5 changes: 4 additions & 1 deletion superset/result_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,10 @@ def convert_pa_dtype(pa_dtype: pa.DataType) -> Optional[str]:

@staticmethod
def convert_table_to_df(table: pa.Table) -> pd.DataFrame:
return table.to_pandas(integer_object_nulls=True)
try:
return table.to_pandas(integer_object_nulls=True)
except pa.lib.ArrowInvalid:
return table.to_pandas(integer_object_nulls=True, timestamp_as_object=True)

@staticmethod
def first_nonempty(items: List[Any]) -> Any:
Expand Down
4 changes: 2 additions & 2 deletions superset/utils/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1762,14 +1762,14 @@ def normalize_dttm_col(
# Column is formatted as a numeric value
unit = timestamp_format.replace("epoch_", "")
df[DTTM_ALIAS] = pd.to_datetime(
dttm_col, utc=False, unit=unit, origin="unix"
dttm_col, utc=False, unit=unit, origin="unix", errors="coerce"
)
else:
# Column has already been formatted as a timestamp.
df[DTTM_ALIAS] = dttm_col.apply(pd.Timestamp)
else:
df[DTTM_ALIAS] = pd.to_datetime(
df[DTTM_ALIAS], utc=False, format=timestamp_format
df[DTTM_ALIAS], utc=False, format=timestamp_format, errors="coerce"
)
if offset:
df[DTTM_ALIAS] += timedelta(hours=offset)
Expand Down
5 changes: 5 additions & 0 deletions tests/integration_tests/utils_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1201,3 +1201,8 @@ def normalize_col(
# test numeric epoch_ms format
df = pd.DataFrame([{"__timestamp": ts.timestamp() * 1000, "a": 1}])
assert normalize_col(df, "epoch_ms", 0, None)[DTTM_ALIAS][0] == ts

# test that out of bounds timestamps are coerced to None instead of
# erroring out
df = pd.DataFrame([{"__timestamp": "1677-09-21 00:00:00", "a": 1}])
assert pd.isnull(normalize_col(df, None, 0, None)[DTTM_ALIAS][0])
52 changes: 52 additions & 0 deletions tests/unit_tests/dataframe_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@
# specific language governing permissions and limitations
# under the License.
# pylint: disable=unused-argument, import-outside-toplevel
from datetime import datetime

import pytest
from pandas import Timestamp

from superset.dataframe import df_to_records
from superset.superset_typing import DbapiDescription

Expand Down Expand Up @@ -53,3 +58,50 @@ def test_js_max_int(app_context: None) -> None:
{"a": 1, "b": "1239162456494753670", "c": "c1"},
{"a": 2, "b": 100, "c": "c2"},
]


@pytest.mark.parametrize(
"input_, expected",
[
pytest.param(
[
(datetime.strptime("1677-09-22 00:12:43", "%Y-%m-%d %H:%M:%S"), 1),
(datetime.strptime("2262-04-11 23:47:17", "%Y-%m-%d %H:%M:%S"), 2),
],
[
{
"a": datetime.strptime("1677-09-22 00:12:43", "%Y-%m-%d %H:%M:%S"),
"b": 1,
},
{
"a": datetime.strptime("2262-04-11 23:47:17", "%Y-%m-%d %H:%M:%S"),
"b": 2,
},
],
id="timestamp conversion fail",
),
pytest.param(
[
(datetime.strptime("1677-09-22 00:12:44", "%Y-%m-%d %H:%M:%S"), 1),
(datetime.strptime("2262-04-11 23:47:16", "%Y-%m-%d %H:%M:%S"), 2),
],
[
{"a": Timestamp("1677-09-22 00:12:44"), "b": 1},
{"a": Timestamp("2262-04-11 23:47:16"), "b": 2},
],
id="timestamp conversion success",
),
],
)
def test_max_pandas_timestamp(input_, expected) -> None:
from superset.db_engine_specs import BaseEngineSpec
from superset.result_set import SupersetResultSet

cursor_descr: DbapiDescription = [
("a", "datetime", None, None, None, None, False),
("b", "int", None, None, None, None, False),
]
results = SupersetResultSet(input_, cursor_descr, BaseEngineSpec)
df = results.to_pandas_df()

assert df_to_records(df) == expected

0 comments on commit 8becd3e

Please sign in to comment.