snowflakedb · sfc-gh-yuwang · Jan 23, 2025 · Jan 24, 2025 · Jan 24, 2025 · Jan 25, 2025
@@ -10,6 +10,7 @@
     wait,
     ALL_COMPLETED,
     ThreadPoolExecutor,
+    as_completed,
 )
 
 from dateutil import parser
@@ -95,6 +96,8 @@
     "TIMESTAMPFORMAT": "TIMESTAMP_FORMAT",
 }
 
+MAX_RETRY_TIME = 3
+
 
 def _validate_stage_path(path: str) -> str:
     stripped_path = path.strip("\"'")
@@ -1072,42 +1075,39 @@ def dbapi(
             sql_create_temp_stage = f"create {get_temp_type_for_object(self._session._use_scoped_temp_objects, True)} stage if not exists {snowflake_stage_name}"
             self._session._run_query(sql_create_temp_stage, is_ddl_on_temp_object=True)
 
-            with ProcessPoolExecutor(max_workers=max_workers) as executor:
-                futures = [
-                    executor.submit(
-                        task_fetch_from_data_source,
-                        create_connection,
-                        query,
-                        raw_schema,
-                        i,
-                        tmp_dir,
-                    )
-                    for i, query in enumerate(partitioned_queries)
-                ]
-
-                completed_futures = wait(futures, return_when=ALL_COMPLETED)
-            files = []
-            for f in completed_futures.done:
-                if isinstance(f.result(), Exception):
-                    raise f.result()
-                else:
-                    files.append(f.result())
-            with ThreadPoolExecutor(max_workers=max_workers) as thread_executor:
-                futures = [
-                    thread_executor.submit(
-                        self.upload_and_copy_into_table,
-                        f,
-                        snowflake_stage_name,
-                        snowflake_table_name,
-                        "abort_statement",
+            with ProcessPoolExecutor(max_workers=max_workers) as process_executor:
+                with ThreadPoolExecutor(max_workers=max_workers) as thread_executor:
+                    thread_pool_futures = []
+                    process_pool_futures = [
+                        process_executor.submit(
+                            task_fetch_from_data_source_with_retry,
+                            create_connection,
+                            query,
+                            raw_schema,
+                            i,
+                            tmp_dir,
+                        )
+                        for i, query in enumerate(partitioned_queries)
+                    ]
+                    for future in as_completed(process_pool_futures):
+                        if isinstance(future.result(), Exception):
+                            raise future.result()
+                        else:
+                            thread_pool_futures.append(
+                                thread_executor.submit(
+                                    self.upload_and_copy_into_table_with_rename_with_retry,
+                                    future.result(),
+                                    snowflake_stage_name,
+                                    snowflake_table_name,
+                                    "abort_statement",
+                                )
+                            )
+                    completed_futures = wait(
+                        thread_pool_futures, return_when=ALL_COMPLETED
                     )
-                    for f in files
-                ]
-
-                completed_futures = wait(futures, return_when=ALL_COMPLETED)
-            for f in completed_futures.done:
-                if f.result() is not None:
-                    raise f.result()
+                    for f in completed_futures.done:
+                        if f.result() is not None:
+                            raise f.result()
             return res_df
 
     def _infer_data_source_schema(
@@ -1205,13 +1205,13 @@ def _to_snowpark_type(self, schema: Tuple[tuple]) -> StructType:
             fields.append(field)
         return StructType(fields)
 
-    def upload_and_copy_into_table(
+    def _upload_and_copy_into_table_with_rename(
         self,
         local_file: str,
         snowflake_stage_name: str,
         snowflake_table_name: Optional[str] = None,
         on_error: Optional[str] = "abort_statement",
-    ) -> Optional[Exception]:
+    ):
         file_name = os.path.basename(local_file)
         put_query = f"put file://{local_file} @{snowflake_stage_name}/ OVERWRITE=TRUE"
         copy_into_table_query = f"""
@@ -1221,28 +1221,62 @@ def upload_and_copy_into_table(
         PURGE=TRUE
         ON_ERROR={on_error}
         """
-        try:
-            self._session.sql(put_query).collect()
-            self._session.sql(copy_into_table_query).collect()
-            return None
-        except Exception as e:
-            return e
+        self._session.sql(put_query).collect()
+        self._session.sql(copy_into_table_query).collect()
+
+    def upload_and_copy_into_table_with_rename_with_retry(
+        self,
+        local_file: str,
+        snowflake_stage_name: str,
+        snowflake_table_name: Optional[str] = None,
+        on_error: Optional[str] = "abort_statement",
+    ) -> Optional[Exception]:
+        retry_count = 0
+        error = None
+        while retry_count < MAX_RETRY_TIME:
+            try:
+                self._upload_and_copy_into_table_with_rename(
+                    local_file, snowflake_stage_name, snowflake_table_name, on_error
+                )
+                return
+            except Exception as e:
+                error = e
+                retry_count += 1
+        return error
 
 
-def task_fetch_from_data_source(
+def _task_fetch_from_data_source(
     create_connection: Callable[[], "Connection"],
     query: str,
     schema: tuple[tuple[str, Any, int, int, int, int, bool]],
     i: int,
     tmp_dir: str,
-) -> Union[str, Exception]:
-    try:
-        conn = create_connection()
-        result = conn.cursor().execute(query).fetchall()
-        columns = [col[0] for col in schema]
-        df = pd.DataFrame.from_records(result, columns=columns)
-        path = os.path.join(tmp_dir, f"data_{i}.parquet")
-        df.to_parquet(path)
-    except Exception as e:
-        return e
+) -> str:
+    conn = create_connection()
+    result = conn.cursor().execute(query).fetchall()
+    columns = [col[0] for col in schema]
+    df = pd.DataFrame.from_records(result, columns=columns)
+    path = os.path.join(tmp_dir, f"data_{i}.parquet")
+    df.to_parquet(path)
     return path
+
+
+def task_fetch_from_data_source_with_retry(
+    create_connection: Callable[[], "Connection"],
+    query: str,
+    schema: tuple[tuple[str, Any, int, int, int, int, bool]],
+    i: int,
+    tmp_dir: str,
+) -> Union[str, Exception]:
+    retry_count = 0
+    error = None
+    while retry_count < MAX_RETRY_TIME:
+        try:
+            path = _task_fetch_from_data_source(
+                create_connection, query, schema, i, tmp_dir
+            )
+            return path
+        except Exception as e:
+            error = e
+            retry_count += 1
+    return error
@@ -5,9 +5,15 @@
 import decimal
 from _decimal import Decimal
 import datetime
+from unittest import mock
 from unittest.mock import MagicMock
 import pytest
 
+from snowflake.snowpark.dataframe_reader import (
+    task_fetch_from_data_source_with_retry,
+    MAX_RETRY_TIME,
+)
+
 SQL_SERVER_TABLE_NAME = "RandomDataWith100Columns"
 
 
@@ -694,3 +700,36 @@ def create_connection():
 def test_dbapi_with_temp_table(session):
     df = session.read.dbapi(create_connection, SQL_SERVER_TABLE_NAME, max_workers=4)
     assert df.collect() == rows
+
+
+@pytest.mark.skipif(
+    "config.getoption('local_testing_mode', default=False)",
+    reason="feature not available in local testing",
+)
+def test_dbapi_retry(session):
+
+    with mock.patch(
+        "snowflake.snowpark.dataframe_reader._task_fetch_from_data_source",
+        side_effect=Exception("Test error"),
+    ) as mock_task:
+        result = task_fetch_from_data_source_with_retry(
+            create_connection=create_connection,
+            query="SELECT * FROM test_table",
+            schema=(("col1", int, 0, 0, 0, 0, False),),
+            i=0,
+            tmp_dir="/tmp",
+        )
+        assert mock_task.call_count == MAX_RETRY_TIME
+        assert isinstance(result, Exception)
+
+    with mock.patch(
+        "snowflake.snowpark.dataframe_reader.DataFrameReader._upload_and_copy_into_table_with_rename",
+        side_effect=Exception("Test error"),
+    ) as mock_task:
+        result = session.read.upload_and_copy_into_table_with_rename_with_retry(
+            local_file="fake_file",
+            snowflake_stage_name="fake_stage",
+            snowflake_table_name="fake_table",
+        )
+        assert mock_task.call_count == MAX_RETRY_TIME
+        assert isinstance(result, Exception)