Merge pull request #55 from ygidtu/dev

update to v0.0.7
ygidtu · Nov 18, 2022 · 62db8e2 · 62db8e2
2 parents 69f6ce8 + 04486e5
commit 62db8e2
Show file tree

Hide file tree

Showing 11 changed files with 429 additions and 415 deletions.
diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/README.md b/README.md
@@ -11,7 +11,7 @@
 
 ![](example/diagram.png)
 
-[See more example](https://sashimi.readthedocs.io/en/latest/)
+[Tutorials](https://sashimi.readthedocs.io/en/latest/)
 
 ## what is sashimi.py
 

diff --git a/requirements.txt b/requirements.txt
@@ -1,51 +1,47 @@
 asciitree>=0.3.3
 cairocffi>=1.4.0
-certifi>=2022.9.24
+certifi>=2022.9.24 ; python_version >= '3.6'
 cffi>=1.15.1
-charset-normalizer>=2.1.1
+charset-normalizer>=2.1.1 ; python_full_version >= '3.6.0'
 click>=8.1.3
-click-option-group>=0.5.3
-contourpy>=1.0.5
+click-option-group>=0.5.5
+contourpy>=1.0.6 ; python_version >= '3.7'
 cooler>=0.8.11
-cycler>=0.11.0
+cycler>=0.11.0 ; python_version >= '3.6'
 cytoolz>=0.10.1
-dill>=0.3.5.1
-filetype>=1.1.0
-fonttools>=4.37.3
-h5py>=3.7.0
+dill>=0.3.6 ; python_version >= '3.7'
+filetype>=1.2.0
+fonttools>=4.38.0 ; python_version >= '3.7'
+h5py>=3.7.0 ; python_version >= '3.7'
 hicmatrix>=15
-idna>=3.4
+idna>=3.4 ; python_version >= '3.5'
 intervaltree>=3.1.0
-kiwisolver>=1.4.4
+kiwisolver>=1.4.4 ; python_version >= '3.7'
 loguru>=0.6.0
-matplotlib>=3.6.0
-multiprocess>=0.70.13
-numexpr>=2.8.3
-numpy>=1.23.3
-packaging>=21.3
-pandas>=1.5.0
-pillow>=9.2.0
+matplotlib>=3.6.2
+multiprocess>=0.70.14 ; python_version >= '3.7'
+numexpr>=2.8.4 ; python_version >= '3.7'
+numpy>=1.23.4
+packaging>=21.3 ; python_version >= '3.6'
+pandas>=1.5.1
+pillow>=9.3.0 ; python_version >= '3.7'
 pybigwig>=0.3.18
 pycparser>=2.21
 pyfaidx>=0.7.1
 pypairix>=0.3.7
-pyparsing>=3.0.9
-pysam>=0.19.1
-python-dateutil>=2.8.2
-pytz>=2022.2.1
-pyyaml>=6.0
+pyparsing>=3.0.9 ; python_full_version >= '3.6.8'
+pysam>=0.20.0
+python-dateutil>=2.8.2 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
+pytz>=2022.6
+pyyaml>=6.0 ; python_version >= '3.6'
 requests>=2.28.1
-scipy>=1.9.1
-seaborn>=0.12.0
-setuptools>=65.4.0
-simplejson>=3.17.6
-six>=1.16.0
+scipy>=1.9.3
+seaborn>=0.12.1
+setuptools>=65.5.1 ; python_version >= '3.7'
+simplejson>=3.18.0 ; python_version >= '2.5' and python_version not in '3.0, 3.1, 3.2, 3.3'
+six>=1.16.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
 sortedcontainers>=2.4.0
-tables>=3.7.0
-toolz>=0.12.0
-urllib3>=1.26.12
+tables>=3.7.0 ; python_version >= '3.6'
+toolz>=0.12.0 ; python_version >= '3.5'
+urllib3>=1.26.12 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' and python_version < '4'
 xmltodict>=0.13.0
-
-uvicorn~=0.18.2
-fastapi~=0.79.0
-pydantic~=1.9.1
diff --git a/sashimi/base/ReadDepth.py b/sashimi/base/ReadDepth.py
@@ -47,22 +47,25 @@ def __init__(self,
         self.junctions_dict = junctions_dict
         self.strand_aware = strand_aware
         self.minus = abs(minus) if minus is not None else minus
-        self.max = max(self.wiggle, default=0)
         self.junction_dict_plus = junction_dict_plus
         self.junction_dict_minus = junction_dict_minus
         self.site_plus = site_plus
         self.site_minus = site_minus * -1 if site_minus is not None else site_minus
 
     @property
     def wiggle(self) -> np.array:
-        if (self.plus is None or np.sum(self.plus) == 0) and self.minus is not None:
+        if (self.plus is None or not self.plus.any()) and self.minus is not None:
             return self.minus
 
         if self.plus is not None and self.minus is not None:
             return self.plus + self.minus
 
         return self.plus
 
+    @property
+    def max(self) -> float:
+        return max(self.wiggle, default=0)
+
     def __add__(self, other):
 
         """
@@ -74,28 +77,33 @@ def __add__(self, other):
                 A new ReadDepth object containing the sum of the two original ReadDepth objects
         """
 
-        if len(self.wiggle) == len(other.wiggle):
-            junctions = self.junctions_dict if self.junctions_dict else {}
-            if other.junctions_dict:
-                for i, j in other.junctions_dict.items():
-                    if i in junctions.keys():
-                        junctions[i] += j
-                    else:
-                        junctions[i] = j
-
-            minus = None
-            if self.minus is not None and other.minus is not None:
-                minus = self.minus + other.minus
-            elif self.minus is None and other.minus is not None:
-                minus = other.minus
-            elif self.minus is not None and other.minus is None:
-                minus = self.minus
-
-            return ReadDepth(
-                self.plus + other.plus,
-                junctions_dict=junctions,
-                minus=minus
-            )
+        if self.wiggle is not None and other.wiggle is not None:
+            if len(self.wiggle) == len(other.wiggle):
+                junctions = self.junctions_dict if self.junctions_dict else {}
+                if other.junctions_dict:
+                    for i, j in other.junctions_dict.items():
+                        if i in junctions.keys():
+                            junctions[i] += j
+                        else:
+                            junctions[i] = j
+
+                minus = None
+                if self.minus is not None and other.minus is not None:
+                    minus = self.minus + other.minus
+                elif self.minus is None and other.minus is not None:
+                    minus = other.minus
+                elif self.minus is not None and other.minus is None:
+                    minus = self.minus
+
+                return ReadDepth(
+                    self.plus + other.plus,
+                    junctions_dict=junctions,
+                    minus=minus
+                )
+        elif self.wiggle is None:
+            return other
+        else:
+            return self
 
     def curr_height(self, pos: int) -> float:
         if self.minus is None:

diff --git a/sashimi/cli.py b/sashimi/cli.py
@@ -21,7 +21,7 @@
 from sashimi.file.ATAC import ATAC
 from sashimi.plot import Plot
 
-__version__ = "0.0.6"
+__version__ = "0.0.7"
 __author__ = "ygidtu & Ran Zhou"
 __email__ = "[email protected]"
 
@@ -49,6 +49,9 @@ def __init__(self,
                  library: str = "fru",
                  trans: Optional[str] = None,
                  depth: int = 30000):
+
+        if path.startswith("~"):
+            path = os.path.expanduser(path)
         self.path = os.path.abspath(path)
 
         if not os.path.exists(self.path):
@@ -63,6 +66,10 @@ def __init__(self,
         self.trans = trans
         self.depth = depth
 
+    @property
+    def name(self) -> str:
+        return os.path.basename(self.path)
+
     def __str__(self):
         return f"path: {self.path} \nlabel: {self.label} \ngroup: {self.group} \n" \
                f"color: {self.color} \ncategory: {self.category} \nlibrary: {self.library}"
@@ -114,7 +121,7 @@ def __read_iter__(path):
 
 def process_file_list(infile: str, category: str = "density"):
     u"""
-    Process and check the file list format
+    Process and check the file list format_
     :param infile: path to input file list
     :param category: the image type of file list used for
     """
@@ -235,7 +242,7 @@ def process_file_list(infile: str, category: str = "density"):
                  show_default=True)
 @optgroup.option("--barcode", type=click.Path(exists=True), show_default=True,
                  help="Path to barcode list file, At list two columns were required, "
-                      "- 1st The name of bam file; \b"
+                      "- 1st The name of bam file, not the alias of bam; \b"
                       "- 2nd the barcode; \b"
                       "- 3rd The group label, optional; \b"
                       "- 4th The color of each cell type, default using the color of corresponding bam file.\n")
@@ -519,8 +526,8 @@ def main(**kwargs):
                     p.add_interval(f.path, f.label)
             elif key == "density":
                 for f in process_file_list(kwargs[key], key):
-                    if barcodes and f.label in barcodes.keys() and f.category in ["bam", "atac"]:
-                        for group in barcodes[f.label].keys():
+                    if barcodes and f.name in barcodes.keys() and f.category in ["bam", "atac"]:
+                        for group in barcodes[f.name].keys():
                             if kwargs["group_by_cell"] and group:
                                 label = group
                             elif group:
@@ -530,13 +537,13 @@ def main(**kwargs):
 
                             if f.label not in size_factors.keys() and f.category == "atac":
                                 logger.info(f"Indexing {f.path}")
-                                size_factors[f.label] = ATAC.index(f.path, barcodes[f.label])
+                                size_factors[f.label] = ATAC.index(f.path, barcodes[f.name])
 
                             p.add_density(f.path,
                                           category=f.category,
                                           label=label,
                                           barcode=group,
-                                          barcode_groups=barcodes[f.label],
+                                          barcode_groups=barcodes[f.name],
                                           barcode_tag=kwargs["barcode_tag"],
                                           umi_tag=kwargs["umi_tag"],
                                           library=f.library,
@@ -568,17 +575,17 @@ def main(**kwargs):
                                       density_by_strand=kwargs["density_by_strand"],)
             elif key == "heatmap":
                 for f in process_file_list(kwargs[key], key):
-                    if barcodes and f.label in barcodes.keys() and f.category in ["bam", "atac"]:
+                    if barcodes and f.name in barcodes.keys() and f.category in ["bam", "atac"]:
                         if f.label not in size_factors.keys() and f.category == "atac":
                             logger.info(f"Indexing {f.path}")
-                            size_factors[f.label] = ATAC.index(f.path, barcodes[f.label])
+                            size_factors[f.label] = ATAC.index(f.path, barcodes[f.name])
 
-                        for group in barcodes[f.label].keys():
+                        for group in barcodes[f.name].keys():
                             p.add_heatmap(f.path,
                                           category=f.category,
                                           label=f"{f.label} - {group}" if group else f.label,
                                           barcode=group,
-                                          barcode_groups=barcodes[f.label],
+                                          barcode_groups=barcodes[f.name],
                                           group=f"{f.group} - {group}" if f.group else f.group,
                                           barcode_tag=kwargs["barcode_tag"],
                                           size_factor=size_factors.get(f.label),
@@ -613,8 +620,8 @@ def main(**kwargs):
                                       vmax=kwargs["heatmap_vmax"])
             elif key == "line":
                 for f in process_file_list(kwargs[key], key):
-                    if barcodes and f.label in barcodes.keys() and f.category == "bam":
-                        for group in barcodes[f.label].keys():
+                    if barcodes and f.name in barcodes.keys() and f.category == "bam":
+                        for group in barcodes[f.name].keys():
                             if kwargs["group_by_cell"] and group:
                                 label = group
                             elif group:
@@ -732,7 +739,8 @@ def main(**kwargs):
             "density": kwargs["sc_density_height_ratio"]
         },
         distance_between_label_axis=kwargs["distance_ratio"],
-        included_junctions=included_junctions
+        included_junctions=included_junctions,
+        n_jobs=kwargs.get("process", 1)
     )
 
 

diff --git a/sashimi/file/File.py b/sashimi/file/File.py
@@ -38,6 +38,9 @@ def end(self) -> int:
     def load(self, *args, **kwargs):
         return None
 
+    def len(self, scale=1) -> int:
+        return len(self.data) / scale if self.data else 0
+
     def __hash__(self) -> int:
         return hash((self.path, self.label, self.title))
 

diff --git a/sashimi/file/Reference.py b/sashimi/file/Reference.py
@@ -13,7 +13,6 @@
 from typing import List, Union, Optional
 
 import filetype
-import matplotlib as mpl
 import pysam
 from loguru import logger
 
@@ -22,7 +21,6 @@
 from sashimi.base.Readder import Reader
 from sashimi.base.Transcript import Transcript
 from sashimi.file.File import File
-from sashimi.base.CoordinateMap import Coordinate
 
 
 class Reference(File):
@@ -133,9 +131,7 @@ def __load_local_domain__(self, region: GenomicLoci):
                 for record in Reader.read_bigbed(bb_file, region):
                     record = record[2].split("\t")
                     current_id = record[0]
-                    strand = record[2]
                     current_start = int(record[3])
-                    num_of_chunk = record[6]
                     block_sizes = [int(x) for x in record[7].split(",") if x]
                     block_starts = [int(x) for x in record[8].split(",") if x]
                     current_desc = record[17]
@@ -307,7 +303,6 @@ def index_gtf(cls, input_gtf):
         gtf = cls.is_gtf(input_gtf)
         assert gtf % 10 == 1, f"{input_gtf} seems not be gtf format"
 
-        index = False
         if gtf // 10 > 0:
             output_gtf = input_gtf
         else:
@@ -369,11 +364,10 @@ def __load_gtf__(self, region: GenomicLoci) -> List[Transcript]:
                         strand=rec.strand,
                         transcript_id=rec.transcript_id,
                         gene_id=rec.gene_id,
-                        gene=rec.gene_name,
-                        transcript=rec.transcript_name,
+                        gene=rec.gene_name if "gene_name" in rec.attributes else "",
+                        transcript=rec.transcript_name if "transcript_name" in rec.attributes else "",
                         exons=[]
                     )
-
             elif re.search(r"(exon)", rec.feature, re.I):
                 if rec.transcript_id not in exons.keys():
                     exons[rec.transcript_id] = []
@@ -444,12 +438,11 @@ def __load_bam__(self, region: GenomicLoci, threshold_of_reads: int = 0) -> List
 
         return sorted([x for x, y in transcripts.items() if y > threshold_of_reads])
 
-    def __load_bed__(self, region: GenomicLoci)-> List[Transcript]:
+    def __load_bed__(self, region: GenomicLoci) -> List[Transcript]:
         transcripts = []
         try:
             for rec in Reader.read_gtf(self.path, region=region, bed=True):
                 exon_bound = []
-                intron_bound = []
                 current_start = int(rec[1])
                 current_end = int(rec[2])
                 if len(rec) > 3:
@@ -525,6 +518,7 @@ def load(self, region: GenomicLoci, threshold_of_reads: int = 0, **kwargs):
         elif self.category == "bed":
             self.data = self.__load_bed__(region)
 
+        rec, start, end, strand = None, None, None, None
         for interval_file, interval_label in self.interval_file.items():
             try:
                 if not os.path.exists(interval_file + ".tbi"):
@@ -562,7 +556,7 @@ def load(self, region: GenomicLoci, threshold_of_reads: int = 0, **kwargs):
                         )
                     )
 
-                if len(interval_target) != 0:
+                if len(interval_target) != 0 and rec is not None:
                     self.data.append(Transcript(
                         chromosome=rec.contig,
                         start=start,