Skip to content

Commit

Permalink
SNOW-869536 Fix buggy behavior in DataFrame.to_local_iterator (#1226)
Browse files Browse the repository at this point in the history
* Add new change

* Fix test_async_to_pandas_batches

* Fix test_async_to_pandas_batches

* Add test for to_pandas_batches

* Remove unnecessary changes for to_pandas_batches

* Fix typo

* Fix typ 🤦

* Address comments
  • Loading branch information
sfc-gh-stan authored Feb 1, 2024
1 parent d1120ad commit 234b026
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 1 deletion.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
### Behavior Changes (API Compatible)

- Added support for an optional `date_part` argument in function `last_day`
### Bug Fixes

- Fixed a bug in `DataFrame.to_local_iterator` where the iterator could yield wrong results if another query is executed before the iterator finishes due to wrong isolation level. For details, please see #945.

## 1.12.0 (2024-01-30)

Expand Down
10 changes: 9 additions & 1 deletion src/snowflake/snowpark/_internal/server_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,8 +434,16 @@ def _to_data_or_iter(
results_cursor: SnowflakeCursor,
to_pandas: bool = False,
to_iter: bool = False,
num_statements: Optional[int] = None,
) -> Dict[str, Any]:
if (
to_iter and not to_pandas
): # Fix for SNOW-869536, to_pandas doesn't have this issue, SnowflakeCursor.fetch_pandas_batches already handles the isolation.
new_cursor = results_cursor.connection.cursor()
new_cursor.execute(
f"SELECT * FROM TABLE(RESULT_SCAN('{results_cursor.sfqid}'))"
)
results_cursor = new_cursor

if to_pandas:
try:
data_or_iter = (
Expand Down
17 changes: 17 additions & 0 deletions tests/integ/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3673,3 +3673,20 @@ def test_dataframe_interval_operation(session):
),
],
)


def test_dataframe_to_local_iterator_isolation(session):
ROW_NUMBER = 10
df = session.create_dataframe(
[[1, 2, 3] for _ in range(ROW_NUMBER)], schema=["a", "b", "c"]
)
my_iter = df.to_local_iterator()
row_counter = 0
for _ in my_iter:
len(df.schema.fields) # this executes a schema query internally
row_counter += 1

# my_iter should be iterating on df.collect()'s query's results, not the schema query (1 row)
assert (
row_counter == ROW_NUMBER
), f"Expect {ROW_NUMBER} rows, Got {row_counter} instead"

0 comments on commit 234b026

Please sign in to comment.