Skip to content

Commit

Permalink
fix(hive): Update _latest_partition_from_df in HiveEngineSpec to work…
Browse files Browse the repository at this point in the history
… on tables with multiple indexes (#14302)

* Fix _latest_partition_from_df in HiveEngineSpec

* Add test HiveEngineSpec._latest_partition_from_df

* Fix formatting to pass black

Co-authored-by: Ville Brofeldt <[email protected]>
  • Loading branch information
codenamelxl and villebro authored Nov 23, 2021
1 parent 5d1c907 commit bc855f4
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 2 deletions.
7 changes: 5 additions & 2 deletions superset/db_engine_specs/hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,9 +429,12 @@ def latest_sub_partition(

@classmethod
def _latest_partition_from_df(cls, df: pd.DataFrame) -> Optional[List[str]]:
"""Hive partitions look like ds={partition name}"""
"""Hive partitions look like ds={partition name}/ds={partition name}"""
if not df.empty:
return [df.iloc[:, 0].max().split("=")[1]]
return [
partition_str.split("=")[1]
for partition_str in df.iloc[:, 0].max().split("/")
]
return None

@classmethod
Expand Down
21 changes: 21 additions & 0 deletions tests/integration_tests/db_engine_specs/hive_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# isort:skip_file
from datetime import datetime
from unittest import mock
from typing import List

import pytest
import pandas as pd
Expand Down Expand Up @@ -379,3 +380,23 @@ def test_where_latest_partition_no_columns_no_values(mock_method):
"test_table", "test_schema", db, select()
)
assert result is None


def test__latest_partition_from_df():
def is_correct_result(data: List, result: List) -> bool:
df = pd.DataFrame({"partition": data})
return HiveEngineSpec._latest_partition_from_df(df) == result

assert is_correct_result(["ds=01-01-19"], ["01-01-19"])
assert is_correct_result(
["ds=01-01-19", "ds=01-03-19", "ds=01-02-19"], ["01-03-19"]
)
assert is_correct_result(["ds=01-01-19/hour=1"], ["01-01-19", "1"])
assert is_correct_result(
["ds=01-01-19/hour=1", "ds=01-03-19/hour=1", "ds=01-02-19/hour=1"],
["01-03-19", "1"],
)
assert is_correct_result(
["ds=01-01-19/hour=1", "ds=01-03-19/hour=1", "ds=01-02-19/hour=2"],
["01-03-19", "1"],
)

0 comments on commit bc855f4

Please sign in to comment.