Skip to content

Commit

Permalink
bigwigs importing
Browse files Browse the repository at this point in the history
  • Loading branch information
LukasMahieu committed Jun 15, 2024
1 parent 1bd6ac3 commit b1420e0
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 20 deletions.
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ dependencies = [
"scikit-learn",
"tqdm",
"loguru",
"logomaker"
"logomaker",
"pybigtools",
]

[project.optional-dependencies]
Expand Down
4 changes: 2 additions & 2 deletions src/crested/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
from importlib.metadata import version

from . import pl, pp, tl
from ._io import import_peaks, import_topics
from ._io import import_bigwigs, import_topics
from ._logging import setup_logging

__all__ = ["pl", "pp", "tl", "import_topics", "import_peaks", "setup_logging"]
__all__ = ["pl", "pp", "tl", "import_topics", "import_bigwigs", "setup_logging"]

__version__ = version("crested")

Expand Down
39 changes: 22 additions & 17 deletions src/crested/_io.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""I/O functions for importing topics and bigWigs into AnnData objects."""

from __future__ import annotations

import os
Expand Down Expand Up @@ -76,7 +78,7 @@ def _extract_values_from_bigwig(bw_file, bed_file, target, target_region_width):


def _read_consensus_regions(
regions_file: PathLike, chromsizes_file: PathLike
regions_file: PathLike, chromsizes_file: PathLike | None = None
) -> pd.DataFrame:
"""Read consensus regions BED file and filter out regions not within chromosomes."""
consensus_peaks = pd.read_csv(
Expand All @@ -90,26 +92,29 @@ def _read_consensus_regions(
+ consensus_peaks[2].astype(str)
)

chromsizes_dict = _read_chromsizes(chromsizes_file)
valid_mask = consensus_peaks.apply(
lambda row: row[0] in chromsizes_dict
and row[1] >= 0
and row[2] <= chromsizes_dict[row[0]],
axis=1,
)
consensus_peaks_filtered = consensus_peaks[valid_mask]

if len(consensus_peaks) != len(consensus_peaks_filtered):
logger.warning(
f"Filtered {len(consensus_peaks) - len(consensus_peaks_filtered)} consensus regions (not within chromosomes)",
if chromsizes_file:
chromsizes_dict = _read_chromsizes(chromsizes_file)
valid_mask = consensus_peaks.apply(
lambda row: row[0] in chromsizes_dict
and row[1] >= 0
and row[2] <= chromsizes_dict[row[0]],
axis=1,
)
return consensus_peaks_filtered
consensus_peaks_filtered = consensus_peaks[valid_mask]

if len(consensus_peaks) != len(consensus_peaks_filtered):
logger.warning(
f"Filtered {len(consensus_peaks) - len(consensus_peaks_filtered)} consensus regions (not within chromosomes)",
)
return consensus_peaks_filtered

return consensus_peaks


def _create_temp_bed_file(
consensus_peaks: pd.DataFrame, target_region_width: int
) -> str:
# Adjust regions based on target_region_width
"""Adjust consensus regions to a target width and create a temporary BED file."""
adjusted_peaks = consensus_peaks.copy()
adjusted_peaks[1] = adjusted_peaks.apply(
lambda row: max(0, row[1] - (target_region_width - (row[2] - row[1])) // 2),
Expand Down Expand Up @@ -265,10 +270,10 @@ def import_topics(
def import_bigwigs(
bigwigs_folder: PathLike,
regions_file: PathLike,
chromsizes_file: PathLike,
chromsizes_file: PathLike | None = None,
target: str = "mean",
target_region_width: int | None = None,
compress: bool = True,
compress: bool = False,
) -> AnnData:
"""
Import bigWig files and consensus regions BED file into AnnData format.
Expand Down

0 comments on commit b1420e0

Please sign in to comment.