Skip to content

Commit

Permalink
Lower limit size
Browse files Browse the repository at this point in the history
  • Loading branch information
mckornfield committed Sep 6, 2023
1 parent 8dd2770 commit 299a29e
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion src/gretel_trainer/relational/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ def handle_partition(df: pd.DataFrame, lock: Lock):
table_session = self._get_table_session(pk_values.table_name)
nonlocal row_count

chunk_size = 15_000 # limit how many checks go into a WHERE clause
chunk_size = 150 # limit how many checks go into a WHERE clause

for _, chunk_df in df.groupby(np.arange(len(df)) // chunk_size):
values_list = chunk_df.to_records(index=False).tolist()
Expand Down Expand Up @@ -542,6 +542,7 @@ def _sample_table(
if self._config.entire_table:
logger.debug(f"Extracting entire table: {table_name}")
with engine.connect() as conn:
# TODO: Add a loading percentage here?
df_iter = pd.read_sql_table(
table_name, conn, chunksize=self._chunk_size
)
Expand Down

0 comments on commit 299a29e

Please sign in to comment.