Skip to content

Commit

Permalink
Merge branch 'EliHei2:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
daniel-unyi-42 authored Dec 9, 2024
2 parents b0f6eaf + fb58fc1 commit de7fe72
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions src/segger/prediction/predict_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,14 +647,14 @@ def _get_id():
step_start_time = time()
print(f"Saving transcirpts.parquet...")
transcripts_save_path = save_dir / "segger_transcripts.parquet"
transcripts_df_filtered = transcripts_df_filtered.repartition(npartitions=100)
# transcripts_df_filtered = transcripts_df_filtered.repartition(npartitions=100)
transcripts_df_filtered.to_parquet(
transcripts_save_path,
engine="pyarrow", # PyArrow is faster and recommended
compression="snappy", # Use snappy compression for speed
write_index=False, # Skip writing index if not needed
append=False, # Set to True if you're appending to an existing Parquet file
overwrite=True,
# write_index=False, # Skip writing index if not needed
# append=False, # Set to True if you're appending to an existing Parquet file
# overwrite=True,
) # Dask handles Parquet well
if verbose:
elapsed_time = time() - step_start_time
Expand All @@ -665,7 +665,7 @@ def _get_id():
step_start_time = time()
print(f"Saving anndata object...")
anndata_save_path = save_dir / "segger_adata.h5ad"
segger_adata = create_anndata(transcripts_df_filtered.compute(), **anndata_kwargs) # Compute for AnnData
segger_adata = create_anndata(transcripts_df_filtered, **anndata_kwargs) # Compute for AnnData
segger_adata.write(anndata_save_path)
if verbose:
elapsed_time = time() - step_start_time
Expand Down

0 comments on commit de7fe72

Please sign in to comment.