Skip to content

Commit

Permalink
Catch mistake in structured dataset (#2834)
Browse files Browse the repository at this point in the history
Signed-off-by: Yee Hing Tong <[email protected]>
  • Loading branch information
wild-endeavor authored and kumare3 committed Nov 8, 2024
1 parent edb8a08 commit b176c1c
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 0 deletions.
7 changes: 7 additions & 0 deletions flytekit/types/structured/structured_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,13 @@ def to_literal(
# In case it's a FlyteSchema
sdt = StructuredDatasetType(format=self.DEFAULT_FORMATS.get(python_type, GENERIC_FORMAT))

if issubclass(python_type, StructuredDataset) and not isinstance(python_val, StructuredDataset):
# Catch a common mistake
raise TypeTransformerFailedError(
f"Expected a StructuredDataset instance, but got {type(python_val)} instead."
f" Did you forget to wrap your dataframe in a StructuredDataset instance?"
)

if expected and expected.structured_dataset_type:
sdt = StructuredDatasetType(
columns=expected.structured_dataset_type.columns,
Expand Down
13 changes: 13 additions & 0 deletions tests/flytekit/unit/core/test_type_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -3692,3 +3692,16 @@ def test_structured_dataset_collection():
lv = TypeEngine.to_literal(FlyteContext.current_context(), [[StructuredDataset(df)]],
WineTypeListList, lt)
assert lv is not None


@pytest.mark.skipif("pandas" not in sys.modules, reason="Pandas is not installed.")
def test_structured_dataset_mismatch():
import pandas as pd

df = pd.DataFrame({"alcohol": [1.0, 2.0], "malic_acid": [2.0, 3.0]})
transformer = TypeEngine.get_transformer(StructuredDataset)
with pytest.raises(TypeTransformerFailedError):
transformer.to_literal(FlyteContext.current_context(), df, StructuredDataset, TypeEngine.to_literal_type(StructuredDataset))

with pytest.raises(TypeTransformerFailedError):
TypeEngine.to_literal(FlyteContext.current_context(), df, StructuredDataset, TypeEngine.to_literal_type(StructuredDataset))

0 comments on commit b176c1c

Please sign in to comment.