switching laptops

BiocPy · Oct 17, 2023 · 4c903c0 · 4c903c0
1 parent c9ba493
commit 4c903c0
Show file tree

Hide file tree

Showing 5 changed files with 87 additions and 57 deletions.
diff --git a/README.md b/README.md
@@ -27,9 +27,9 @@ You can easily access UCSC genomes or load a genome annotation from a GTF file u
 ```python
 import genomicranges
 
-gr = genomicranges.from_gtf(<PATH TO GTF>)
+gr = genomicranges.parse_gtf(<PATH TO GTF>)
 # OR
-gr = genomicranges.from_ucsc(genome="hg19")
+gr = genomicranges.read_ucsc(genome="hg19")
 ```
 #### Pandas DataFrame
 

diff --git a/setup.cfg b/setup.cfg
@@ -5,8 +5,8 @@
 
 [metadata]
 name = GenomicRanges
-description = Container class to represent genomic locations and support genomic analysis.
-author = jkanche
+description = Container class to represent and operate over genomic regions and annotations.
+author = Jayaram Kancherla
 author_email = [email protected]
 license = MIT
 license_files = LICENSE.txt
@@ -50,9 +50,10 @@ package_dir =
 install_requires =
     importlib-metadata; python_version<"3.8"
     pandas
-    biocframe>=0.3.9
+    biocframe>=0.3.13
     numpy
-    prettytable
+    biocgenerics
+    biocutils
 
 [options.packages.find]
 where = src

diff --git a/src/genomicranges/GenomicRanges.py b/src/genomicranges/GenomicRanges.py
@@ -14,9 +14,12 @@
 
 from biocframe import BiocFrame
 from biocframe.types import SlicerArgTypes
+from biocgenerics.combine import combine
+from biocgenerics.combine_cols import combine_cols
+from biocgenerics.combine_rows import combine_rows
+from biocutils import is_list_of_type
 from numpy import concatenate, count_nonzero, ndarray, sum, zeros
 from pandas import DataFrame, concat, isna
-from prettytable import PrettyTable
 
 from .interval import (
     OVERLAP_QUERY_TYPES,
@@ -42,7 +45,7 @@
 
 
 class GenomicRanges(BiocFrame):
-    """`GenomicRanges` provides functionality to represent and operate over genomic regions and annotations.
+    """`GenomicRanges` provides a container class to represent and operate over genomic regions and annotations.
 
     **Note: Intervals are inclusive on both ends and start at 1.**
 
@@ -410,47 +413,66 @@ def mcols(self, return_type: Optional[Callable] = None) -> Any:
                 raise ValueError(f"{return_type} not supported, {str(e)}")
 
     def __repr__(self) -> str:
-        table = PrettyTable(padding_width=2)
-        table.field_names = [str(col) for col in self.column_names]
+        from io import StringIO
+
+        from rich.console import Console
+        from rich.table import Table
+
+        table = Table(
+            title=f"GenomicRanges with {self.dims[0]} intervals & {self.dims[1] - 4} metadata columns",
+            show_header=True,
+        )
+        if self.row_names is not None:
+            table.add_column("row_names")
+
+        for col in self.column_names:
+            table.add_column(f"{str(col)} [italic]<{type(self.column(col)).__name__}>")
 
         _rows = []
         rows_to_show = 2
         _top = self.shape[0]
         if _top > rows_to_show:
             _top = rows_to_show
 
-        # top three rows
+        # top two rows
         for r in range(_top):
             _row = self.row(r)
             vals = list(_row.values())
             res = [str(v) for v in vals]
+            if self.row_names:
+                res = [str(self.row_names[r])] + res
             _rows.append(res)
 
         if self.shape[0] > 2 * rows_to_show:
             # add ...
-            _rows.append(["..." for _ in range(len(self.column_names))])
+            _dots = []
+            if self.row_names:
+                _dots = ["..."]
+
+            _dots.extend(["..." for _ in range(len(self.column_names))])
+            _rows.append(_dots)
 
-        _last = self.shape[0] - rows_to_show
+        _last = self.shape[0] - _top
         if _last <= rows_to_show:
             _last = self.shape[0] - _top
 
-        # last three rows
-        for r in range(_last, len(self)):
+        # last set of rows
+        for r in range(_last + 1, len(self)):
             _row = self.row(r)
             vals = list(_row.values())
             res = [str(v) for v in vals]
+            if self.row_names:
+                res = [str(self.row_names[r])] + res
             _rows.append(res)
 
-        table.add_rows(_rows)
+        for _row in _rows:
+            table.add_row(*_row)
 
-        pattern = (
-            f"Class GenomicRanges with {self.dims[0]} intervals and "
-            f"{self.dims[1] - 4} metadata columns \n"
-            f"contains row names?: {self.row_names is not None} \n"
-            f"{table.get_string()}"
-        )
+        console = Console(file=StringIO())
+        with console.capture() as capture:
+            console.print(table)
 
-        return pattern
+        return capture.get()
 
     # for documentation, otherwise serves no real use.
     def __getitem__(self, args: SlicerArgTypes) -> Union["GenomicRanges", dict, list]:
@@ -2196,46 +2218,24 @@ def invert_strand(self) -> "GenomicRanges":
             metadata=self.metadata,
         )
 
-    def concat(self, *granges: "GenomicRanges") -> "GenomicRanges":
-        """Row-wise concatenate multiple `GenomicRanges` objects.
+    def combine(self, *other: "GenomicRanges") -> "GenomicRanges":
+        """Combine multiple GenomicRanges objects by row.
+
+        Note: Fills missing columns with an array of `None`s.
 
         Args:
-            granges (GenomicRanges): Objects to concatenate.
+            *other (GenomicRanges): GenomicRanges objects.
 
         Raises:
-            TypeError: If any ``granges`` are not "GenomicRanges".
+            TypeError: If all objects are not of type GenomicRanges.
 
         Returns:
-            GenomicRanges: A new concatenated `GenomicRanges` object.
+            BiocFrame: A combined BiocFrame.
         """
-        all_granges = [isinstance(gr, GenomicRanges) for gr in granges]
-
-        if not all(all_granges):
-            raise TypeError("all provided objects are not GenomicRanges objects")
-
-        all_columns = [gr.column_names for gr in granges]
-        all_columns.append(self.column_names)
-        all_unique_columns = list(
-            set([item for sublist in all_columns for item in sublist])
-        )
-
-        new_data = OrderedDict()
-        for col in all_unique_columns:
-            if col not in new_data:
-                new_data[col] = []
-
-            for gr in granges:
-                if col in gr.column_names:
-                    new_data[col].extend(gr.column(col))
-                else:
-                    new_data[col].extend([None] * len(gr))
-
-            if col in self.column_names:
-                new_data[col].extend(self.column(col))
-            else:
-                new_data[col].extend([None] * len(self))
+        if not is_list_of_type(other, GenomicRanges):
+            raise TypeError("All objects to combine must be GenomicRanges objects.")
 
-        return GenomicRanges(new_data, column_names=all_unique_columns)
+        return super().combine(*other)
 
     @classmethod
     def empty(cls):
@@ -2245,3 +2245,32 @@ def empty(cls):
             same type as caller, in this case a `GenomicRanges`.
         """
         return cls(number_of_rows=0)
+
+
+@combine.register(GenomicRanges)
+def _combine_gr(*x: GenomicRanges):
+    if not is_list_of_type(x, GenomicRanges):
+        raise ValueError("All elements to `combine` must be `GenomicRanges` objects.")
+    return x[0].combine(*x[1:])
+
+
+@combine_rows.register(GenomicRanges)
+def _combine_rows_gr(*x: GenomicRanges):
+    if not is_list_of_type(x, GenomicRanges):
+        raise ValueError(
+            "All elements to `combine_rows` must be `GenomicRanges` objects."
+        )
+
+    return x[0].combine(*x[1:])
+
+
+@combine_cols.register(GenomicRanges)
+def _combine_cols_gr(*x: GenomicRanges):
+    if not is_list_of_type(x, GenomicRanges):
+        raise ValueError(
+            "All elements to `combine_cols` must be `GenomicRanges` objects."
+        )
+
+    raise NotImplementedError(
+        "`combine_cols` is not implemented for `GenomicRanges` objects."
+    )
diff --git a/src/genomicranges/io/pdf.py b/src/genomicranges/io/pdf.py
@@ -17,7 +17,7 @@ def from_pandas(data: DataFrame) -> "GenomicRanges":
             Must contain 'seqnames', 'starts' & 'ends' columns.
 
     Returns:
-        GenomicRanges: object.
+        GenomicRanges: Object representing intervals.
     """
 
     from ..GenomicRanges import GenomicRanges

diff --git a/tests/test_gr_misc.py b/tests/test_gr_misc.py
@@ -66,7 +66,7 @@ def test_concat():
     g_tgt = genomicranges.from_pandas(df_tgt)
     assert g_tgt is not None
 
-    result = g_src.concat(g_tgt)
+    result = g_src.combine(g_tgt)
 
     assert result is not None
     assert result.shape[0] == 15