Skip to content

Commit

Permalink
Merge pull request #55 from ygidtu/dev
Browse files Browse the repository at this point in the history
update to v0.0.7
  • Loading branch information
ygidtu authored Nov 18, 2022
2 parents 69f6ce8 + 04486e5 commit 62db8e2
Show file tree
Hide file tree
Showing 11 changed files with 429 additions and 415 deletions.
646 changes: 323 additions & 323 deletions Pipfile.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

![](example/diagram.png)

[See more example](https://sashimi.readthedocs.io/en/latest/)
[Tutorials](https://sashimi.readthedocs.io/en/latest/)

## what is sashimi.py

Expand Down
66 changes: 31 additions & 35 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,51 +1,47 @@
asciitree>=0.3.3
cairocffi>=1.4.0
certifi>=2022.9.24
certifi>=2022.9.24 ; python_version >= '3.6'
cffi>=1.15.1
charset-normalizer>=2.1.1
charset-normalizer>=2.1.1 ; python_full_version >= '3.6.0'
click>=8.1.3
click-option-group>=0.5.3
contourpy>=1.0.5
click-option-group>=0.5.5
contourpy>=1.0.6 ; python_version >= '3.7'
cooler>=0.8.11
cycler>=0.11.0
cycler>=0.11.0 ; python_version >= '3.6'
cytoolz>=0.10.1
dill>=0.3.5.1
filetype>=1.1.0
fonttools>=4.37.3
h5py>=3.7.0
dill>=0.3.6 ; python_version >= '3.7'
filetype>=1.2.0
fonttools>=4.38.0 ; python_version >= '3.7'
h5py>=3.7.0 ; python_version >= '3.7'
hicmatrix>=15
idna>=3.4
idna>=3.4 ; python_version >= '3.5'
intervaltree>=3.1.0
kiwisolver>=1.4.4
kiwisolver>=1.4.4 ; python_version >= '3.7'
loguru>=0.6.0
matplotlib>=3.6.0
multiprocess>=0.70.13
numexpr>=2.8.3
numpy>=1.23.3
packaging>=21.3
pandas>=1.5.0
pillow>=9.2.0
matplotlib>=3.6.2
multiprocess>=0.70.14 ; python_version >= '3.7'
numexpr>=2.8.4 ; python_version >= '3.7'
numpy>=1.23.4
packaging>=21.3 ; python_version >= '3.6'
pandas>=1.5.1
pillow>=9.3.0 ; python_version >= '3.7'
pybigwig>=0.3.18
pycparser>=2.21
pyfaidx>=0.7.1
pypairix>=0.3.7
pyparsing>=3.0.9
pysam>=0.19.1
python-dateutil>=2.8.2
pytz>=2022.2.1
pyyaml>=6.0
pyparsing>=3.0.9 ; python_full_version >= '3.6.8'
pysam>=0.20.0
python-dateutil>=2.8.2 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
pytz>=2022.6
pyyaml>=6.0 ; python_version >= '3.6'
requests>=2.28.1
scipy>=1.9.1
seaborn>=0.12.0
setuptools>=65.4.0
simplejson>=3.17.6
six>=1.16.0
scipy>=1.9.3
seaborn>=0.12.1
setuptools>=65.5.1 ; python_version >= '3.7'
simplejson>=3.18.0 ; python_version >= '2.5' and python_version not in '3.0, 3.1, 3.2, 3.3'
six>=1.16.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
sortedcontainers>=2.4.0
tables>=3.7.0
toolz>=0.12.0
urllib3>=1.26.12
tables>=3.7.0 ; python_version >= '3.6'
toolz>=0.12.0 ; python_version >= '3.5'
urllib3>=1.26.12 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' and python_version < '4'
xmltodict>=0.13.0

uvicorn~=0.18.2
fastapi~=0.79.0
pydantic~=1.9.1
56 changes: 32 additions & 24 deletions sashimi/base/ReadDepth.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,22 +47,25 @@ def __init__(self,
self.junctions_dict = junctions_dict
self.strand_aware = strand_aware
self.minus = abs(minus) if minus is not None else minus
self.max = max(self.wiggle, default=0)
self.junction_dict_plus = junction_dict_plus
self.junction_dict_minus = junction_dict_minus
self.site_plus = site_plus
self.site_minus = site_minus * -1 if site_minus is not None else site_minus

@property
def wiggle(self) -> np.array:
if (self.plus is None or np.sum(self.plus) == 0) and self.minus is not None:
if (self.plus is None or not self.plus.any()) and self.minus is not None:
return self.minus

if self.plus is not None and self.minus is not None:
return self.plus + self.minus

return self.plus

@property
def max(self) -> float:
return max(self.wiggle, default=0)

def __add__(self, other):

"""
Expand All @@ -74,28 +77,33 @@ def __add__(self, other):
A new ReadDepth object containing the sum of the two original ReadDepth objects
"""

if len(self.wiggle) == len(other.wiggle):
junctions = self.junctions_dict if self.junctions_dict else {}
if other.junctions_dict:
for i, j in other.junctions_dict.items():
if i in junctions.keys():
junctions[i] += j
else:
junctions[i] = j

minus = None
if self.minus is not None and other.minus is not None:
minus = self.minus + other.minus
elif self.minus is None and other.minus is not None:
minus = other.minus
elif self.minus is not None and other.minus is None:
minus = self.minus

return ReadDepth(
self.plus + other.plus,
junctions_dict=junctions,
minus=minus
)
if self.wiggle is not None and other.wiggle is not None:
if len(self.wiggle) == len(other.wiggle):
junctions = self.junctions_dict if self.junctions_dict else {}
if other.junctions_dict:
for i, j in other.junctions_dict.items():
if i in junctions.keys():
junctions[i] += j
else:
junctions[i] = j

minus = None
if self.minus is not None and other.minus is not None:
minus = self.minus + other.minus
elif self.minus is None and other.minus is not None:
minus = other.minus
elif self.minus is not None and other.minus is None:
minus = self.minus

return ReadDepth(
self.plus + other.plus,
junctions_dict=junctions,
minus=minus
)
elif self.wiggle is None:
return other
else:
return self

def curr_height(self, pos: int) -> float:
if self.minus is None:
Expand Down
36 changes: 22 additions & 14 deletions sashimi/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from sashimi.file.ATAC import ATAC
from sashimi.plot import Plot

__version__ = "0.0.6"
__version__ = "0.0.7"
__author__ = "ygidtu & Ran Zhou"
__email__ = "[email protected]"

Expand Down Expand Up @@ -49,6 +49,9 @@ def __init__(self,
library: str = "fru",
trans: Optional[str] = None,
depth: int = 30000):

if path.startswith("~"):
path = os.path.expanduser(path)
self.path = os.path.abspath(path)

if not os.path.exists(self.path):
Expand All @@ -63,6 +66,10 @@ def __init__(self,
self.trans = trans
self.depth = depth

@property
def name(self) -> str:
return os.path.basename(self.path)

def __str__(self):
return f"path: {self.path} \nlabel: {self.label} \ngroup: {self.group} \n" \
f"color: {self.color} \ncategory: {self.category} \nlibrary: {self.library}"
Expand Down Expand Up @@ -114,7 +121,7 @@ def __read_iter__(path):

def process_file_list(infile: str, category: str = "density"):
u"""
Process and check the file list format
Process and check the file list format_
:param infile: path to input file list
:param category: the image type of file list used for
"""
Expand Down Expand Up @@ -235,7 +242,7 @@ def process_file_list(infile: str, category: str = "density"):
show_default=True)
@optgroup.option("--barcode", type=click.Path(exists=True), show_default=True,
help="Path to barcode list file, At list two columns were required, "
"- 1st The name of bam file; \b"
"- 1st The name of bam file, not the alias of bam; \b"
"- 2nd the barcode; \b"
"- 3rd The group label, optional; \b"
"- 4th The color of each cell type, default using the color of corresponding bam file.\n")
Expand Down Expand Up @@ -519,8 +526,8 @@ def main(**kwargs):
p.add_interval(f.path, f.label)
elif key == "density":
for f in process_file_list(kwargs[key], key):
if barcodes and f.label in barcodes.keys() and f.category in ["bam", "atac"]:
for group in barcodes[f.label].keys():
if barcodes and f.name in barcodes.keys() and f.category in ["bam", "atac"]:
for group in barcodes[f.name].keys():
if kwargs["group_by_cell"] and group:
label = group
elif group:
Expand All @@ -530,13 +537,13 @@ def main(**kwargs):

if f.label not in size_factors.keys() and f.category == "atac":
logger.info(f"Indexing {f.path}")
size_factors[f.label] = ATAC.index(f.path, barcodes[f.label])
size_factors[f.label] = ATAC.index(f.path, barcodes[f.name])

p.add_density(f.path,
category=f.category,
label=label,
barcode=group,
barcode_groups=barcodes[f.label],
barcode_groups=barcodes[f.name],
barcode_tag=kwargs["barcode_tag"],
umi_tag=kwargs["umi_tag"],
library=f.library,
Expand Down Expand Up @@ -568,17 +575,17 @@ def main(**kwargs):
density_by_strand=kwargs["density_by_strand"],)
elif key == "heatmap":
for f in process_file_list(kwargs[key], key):
if barcodes and f.label in barcodes.keys() and f.category in ["bam", "atac"]:
if barcodes and f.name in barcodes.keys() and f.category in ["bam", "atac"]:
if f.label not in size_factors.keys() and f.category == "atac":
logger.info(f"Indexing {f.path}")
size_factors[f.label] = ATAC.index(f.path, barcodes[f.label])
size_factors[f.label] = ATAC.index(f.path, barcodes[f.name])

for group in barcodes[f.label].keys():
for group in barcodes[f.name].keys():
p.add_heatmap(f.path,
category=f.category,
label=f"{f.label} - {group}" if group else f.label,
barcode=group,
barcode_groups=barcodes[f.label],
barcode_groups=barcodes[f.name],
group=f"{f.group} - {group}" if f.group else f.group,
barcode_tag=kwargs["barcode_tag"],
size_factor=size_factors.get(f.label),
Expand Down Expand Up @@ -613,8 +620,8 @@ def main(**kwargs):
vmax=kwargs["heatmap_vmax"])
elif key == "line":
for f in process_file_list(kwargs[key], key):
if barcodes and f.label in barcodes.keys() and f.category == "bam":
for group in barcodes[f.label].keys():
if barcodes and f.name in barcodes.keys() and f.category == "bam":
for group in barcodes[f.name].keys():
if kwargs["group_by_cell"] and group:
label = group
elif group:
Expand Down Expand Up @@ -732,7 +739,8 @@ def main(**kwargs):
"density": kwargs["sc_density_height_ratio"]
},
distance_between_label_axis=kwargs["distance_ratio"],
included_junctions=included_junctions
included_junctions=included_junctions,
n_jobs=kwargs.get("process", 1)
)


Expand Down
3 changes: 3 additions & 0 deletions sashimi/file/File.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ def end(self) -> int:
def load(self, *args, **kwargs):
return None

def len(self, scale=1) -> int:
return len(self.data) / scale if self.data else 0

def __hash__(self) -> int:
return hash((self.path, self.label, self.title))

Expand Down
16 changes: 5 additions & 11 deletions sashimi/file/Reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from typing import List, Union, Optional

import filetype
import matplotlib as mpl
import pysam
from loguru import logger

Expand All @@ -22,7 +21,6 @@
from sashimi.base.Readder import Reader
from sashimi.base.Transcript import Transcript
from sashimi.file.File import File
from sashimi.base.CoordinateMap import Coordinate


class Reference(File):
Expand Down Expand Up @@ -133,9 +131,7 @@ def __load_local_domain__(self, region: GenomicLoci):
for record in Reader.read_bigbed(bb_file, region):
record = record[2].split("\t")
current_id = record[0]
strand = record[2]
current_start = int(record[3])
num_of_chunk = record[6]
block_sizes = [int(x) for x in record[7].split(",") if x]
block_starts = [int(x) for x in record[8].split(",") if x]
current_desc = record[17]
Expand Down Expand Up @@ -307,7 +303,6 @@ def index_gtf(cls, input_gtf):
gtf = cls.is_gtf(input_gtf)
assert gtf % 10 == 1, f"{input_gtf} seems not be gtf format"

index = False
if gtf // 10 > 0:
output_gtf = input_gtf
else:
Expand Down Expand Up @@ -369,11 +364,10 @@ def __load_gtf__(self, region: GenomicLoci) -> List[Transcript]:
strand=rec.strand,
transcript_id=rec.transcript_id,
gene_id=rec.gene_id,
gene=rec.gene_name,
transcript=rec.transcript_name,
gene=rec.gene_name if "gene_name" in rec.attributes else "",
transcript=rec.transcript_name if "transcript_name" in rec.attributes else "",
exons=[]
)

elif re.search(r"(exon)", rec.feature, re.I):
if rec.transcript_id not in exons.keys():
exons[rec.transcript_id] = []
Expand Down Expand Up @@ -444,12 +438,11 @@ def __load_bam__(self, region: GenomicLoci, threshold_of_reads: int = 0) -> List

return sorted([x for x, y in transcripts.items() if y > threshold_of_reads])

def __load_bed__(self, region: GenomicLoci)-> List[Transcript]:
def __load_bed__(self, region: GenomicLoci) -> List[Transcript]:
transcripts = []
try:
for rec in Reader.read_gtf(self.path, region=region, bed=True):
exon_bound = []
intron_bound = []
current_start = int(rec[1])
current_end = int(rec[2])
if len(rec) > 3:
Expand Down Expand Up @@ -525,6 +518,7 @@ def load(self, region: GenomicLoci, threshold_of_reads: int = 0, **kwargs):
elif self.category == "bed":
self.data = self.__load_bed__(region)

rec, start, end, strand = None, None, None, None
for interval_file, interval_label in self.interval_file.items():
try:
if not os.path.exists(interval_file + ".tbi"):
Expand Down Expand Up @@ -562,7 +556,7 @@ def load(self, region: GenomicLoci, threshold_of_reads: int = 0, **kwargs):
)
)

if len(interval_target) != 0:
if len(interval_target) != 0 and rec is not None:
self.data.append(Transcript(
chromosome=rec.contig,
start=start,
Expand Down
Loading

0 comments on commit 62db8e2

Please sign in to comment.