Skip to content

Commit

Permalink
Deprecate Row.as_dict() (#309)
Browse files Browse the repository at this point in the history
This PR deprecates the `Row.as_dict()` method: this class is intended to
be interchangeable with Spark's `Row`, which doesn't support
`.as_dict()` method. Having it present leads to subtle bugs where code
works when the execution-based backends are being used but fails when
the Spark-based backend is used instead. Clients should instead use
`.asDict()`.

To signal the deprecation we use Python's warnings mechanism (including
the use of the new annotation in Python 3.13, to help with static code
analysis).
  • Loading branch information
asnare authored Nov 7, 2024
1 parent 94ed7f0 commit 9aace9e
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 13 deletions.
35 changes: 24 additions & 11 deletions src/databricks/labs/lsql/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
import json
import logging
import random
import sys
import threading
import time
import types
import warnings
from collections.abc import Callable, Iterator
from datetime import timedelta
from typing import TYPE_CHECKING, Any
Expand Down Expand Up @@ -70,28 +72,39 @@ def __new__(cls, *args, **kwargs):
return tuple.__new__(cls, args)

@classmethod
def factory(cls, col_names: list[str]) -> type:
def factory(cls, col_names: list[str]) -> type["Row"]:
"""Create a new Row class with the given column names."""
return type("Row", (Row,), {"__columns__": col_names})

def as_dict(self) -> dict[str, Any]:
"""Convert the row to a dictionary with the same conventions as Databricks SDK."""
return dict(zip(self.__columns__, self, strict=True))
# If we can mark the method as deprecated via PEP 702 annotation, prefer this because it helps mypy and
# PyCharm/IntelliJ detect and flag deprecated use.
if sys.version_info >= (3, 13):

@warnings.deprecated("Using as_dict() on rows is deprecated; use asDict() instead.") # pylint: disable=no-member
def as_dict(self) -> dict[str, Any]:
"""Convert the row to a dictionary with the same conventions as Databricks SDK."""
return self.asDict()
else:

def as_dict(self) -> dict[str, Any]:
"""Convert the row to a dictionary with the same conventions as Databricks SDK."""
warnings.warn("Using as_dict() on rows is deprecated; use asDict() instead.", DeprecationWarning)
return self.asDict()

# PySpark's compatibility
def asDict(self, recursive: bool = False) -> dict[str, Any]:
_ = recursive
return self.as_dict()
return dict(zip(self.__columns__, self, strict=True))

def __eq__(self, other):
def __eq__(self, other) -> bool:
"""Check if the rows are equal."""
if not isinstance(other, Row):
return False
# compare rows as dictionaries, because the order
# of fields in constructor is not guaranteed
return self.as_dict() == other.as_dict()
return self.asDict() == other.asDict()

def __contains__(self, item):
def __contains__(self, item) -> bool:
"""Check if the column is in the row."""
return item in self.__columns__

Expand All @@ -114,7 +127,7 @@ def __getattr__(self, col):
except ValueError:
raise AttributeError(col) from None

def __repr__(self):
def __repr__(self) -> str:
"""Get the string representation of the row."""
return f"Row({', '.join(f'{k}={v!r}' for (k, v) in zip(self.__columns__, self, strict=True))})"

Expand Down Expand Up @@ -311,7 +324,7 @@ def fetch_all(
>>> pickup_time, dropoff_time = row[0], row[1]
>>> pickup_zip = row.pickup_zip
>>> dropoff_zip = row["dropoff_zip"]
>>> all_fields = row.as_dict()
>>> all_fields = row.asDict()
>>> logger.info(f"{pickup_zip}@{pickup_time} -> {dropoff_zip}@{dropoff_time}: {all_fields}")
:param statement: str
Expand Down Expand Up @@ -366,7 +379,7 @@ def fetch_one(self, statement: str, disable_magic: bool = False, **kwargs) -> Ro
>>> pickup_time, dropoff_time = row[0], row[1]
>>> pickup_zip = row.pickup_zip
>>> dropoff_zip = row['dropoff_zip']
>>> all_fields = row.as_dict()
>>> all_fields = row.asDict()
>>> print(f'{pickup_zip}@{pickup_time} -> {dropoff_zip}@{dropoff_time}: {all_fields}')
:param statement: str
Expand Down
10 changes: 9 additions & 1 deletion tests/integration/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def test_sql_execution_partial(ws, env_or_skip):
pickup_time, dropoff_time = row[0], row[1]
pickup_zip = row.pickup_zip
dropoff_zip = row["dropoff_zip"]
all_fields = row.as_dict()
all_fields = row.asDict()
logger.info(f"{pickup_zip}@{pickup_time} -> {dropoff_zip}@{dropoff_time}: {all_fields}")
results.append((pickup_zip, dropoff_zip))
assert results == [
Expand Down Expand Up @@ -83,3 +83,11 @@ def test_fetch_value(ws):
see = StatementExecutionExt(ws)
count = see.fetch_value("SELECT COUNT(*) FROM samples.nyctaxi.trips")
assert count == 21932


def test_row_as_dict_deprecated(ws) -> None:
see = StatementExecutionExt(ws)
row = see.fetch_one("SELECT 1")
assert row is not None
with pytest.deprecated_call():
_ = row.as_dict()
2 changes: 1 addition & 1 deletion tests/unit/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def test_row_from_kwargs(row):
assert "foo" in row
assert len(row) == 2
assert list(row) == ["bar", True]
assert row.as_dict() == {"foo": "bar", "enabled": True}
assert row.asDict() == {"foo": "bar", "enabled": True}
foo, enabled = row
assert foo == "bar"
assert enabled is True
Expand Down

0 comments on commit 9aace9e

Please sign in to comment.