Skip to content

Commit

Permalink
Update predict_parquet.py
Browse files Browse the repository at this point in the history
  • Loading branch information
EliHei2 authored Dec 9, 2024
1 parent f712172 commit fb58fc1
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions src/segger/prediction/predict_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -640,14 +640,14 @@ def _get_id():
step_start_time = time()
print(f"Saving transcirpts.parquet...")
transcripts_save_path = save_dir / "segger_transcripts.parquet"
transcripts_df_filtered = transcripts_df_filtered.repartition(npartitions=100)
# transcripts_df_filtered = transcripts_df_filtered.repartition(npartitions=100)
transcripts_df_filtered.to_parquet(
transcripts_save_path,
engine="pyarrow", # PyArrow is faster and recommended
compression="snappy", # Use snappy compression for speed
write_index=False, # Skip writing index if not needed
append=False, # Set to True if you're appending to an existing Parquet file
overwrite=True,
# write_index=False, # Skip writing index if not needed
# append=False, # Set to True if you're appending to an existing Parquet file
# overwrite=True,
) # Dask handles Parquet well
if verbose:
elapsed_time = time() - step_start_time
Expand All @@ -658,7 +658,7 @@ def _get_id():
step_start_time = time()
print(f"Saving anndata object...")
anndata_save_path = save_dir / "segger_adata.h5ad"
segger_adata = create_anndata(transcripts_df_filtered.compute(), **anndata_kwargs) # Compute for AnnData
segger_adata = create_anndata(transcripts_df_filtered, **anndata_kwargs) # Compute for AnnData
segger_adata.write(anndata_save_path)
if verbose:
elapsed_time = time() - step_start_time
Expand Down

0 comments on commit fb58fc1

Please sign in to comment.