diff --git a/src/pydiverse/transform/core/table_impl.py b/src/pydiverse/transform/core/table_impl.py index eeee76c6..721c8b62 100644 --- a/src/pydiverse/transform/core/table_impl.py +++ b/src/pydiverse/transform/core/table_impl.py @@ -181,6 +181,9 @@ def alias(self, name=None) -> AbstractTableImpl: def collect(self): ... + def collect_scalar(self): + ... + def build_query(self): ... diff --git a/src/pydiverse/transform/core/verbs.py b/src/pydiverse/transform/core/verbs.py index 4c138112..26ab723a 100644 --- a/src/pydiverse/transform/core/verbs.py +++ b/src/pydiverse/transform/core/verbs.py @@ -116,6 +116,12 @@ def collect(tbl: AbstractTableImpl): return tbl.collect() +@builtin_verb() +def collect_scalar(tbl: AbstractTableImpl): + validate_table_args(tbl) + return tbl.collect_scalar() + + @builtin_verb() def build_query(tbl: AbstractTableImpl): return tbl.build_query() diff --git a/src/pydiverse/transform/eager/pandas_table.py b/src/pydiverse/transform/eager/pandas_table.py index 45b0497a..17601e4b 100644 --- a/src/pydiverse/transform/eager/pandas_table.py +++ b/src/pydiverse/transform/eager/pandas_table.py @@ -44,8 +44,11 @@ class PandasTableImpl(EagerTableImpl): computed at some point. This allows for a more lazy style API. """ - def __init__(self, name: str, df: pd.DataFrame): - self.df = fast_pd_convert_dtypes(df) + def __init__(self, name: str, df: pd.DataFrame, convert_dtypes_to_ensure_semantics=True): + if convert_dtypes_to_ensure_semantics: + self.df = fast_pd_convert_dtypes(df) + else: + self.df = df self.join_translator = self.JoinTranslator() columns = { @@ -133,6 +136,11 @@ def collect(self) -> pd.DataFrame: result.attrs["name"] = self.name return result + def collect_scalar(self) -> int | float | str | bool: # TODO-in-this-pr: maybe-more? + collect = self.collect() + assert collect.shape == (1, 1) # TODO in this PR: assert with if + return self.collect().iloc[0, 0] + def mutate(self, **kwargs): uuid_kwargs = {self.named_cols.fwd[k]: (k, v) for k, v in kwargs.items()} self.df_name_mapping.update(