single-cell-data · johnkerl · Jun 24, 2022 · Jun 23, 2022 · Jun 23, 2022 · Jun 24, 2022
diff --git a/apis/python/README-dev.md b/apis/python/README-dev.md
@@ -1,6 +1,8 @@
 * Most things are configured using GitHub Actions at `../../.github/workflows`
 * Pre-push suggestions:
-  * `black .`
+  * `black . tools/[a-z]*`
+  * `isort . tools/[a-z]*`
+  * `flake8 . tools/[a-z]*`
   * `python -m pytest tests`
 * PyPI:
   * https://pypi.org/project/tiledbsc/

diff --git a/apis/python/doc/tiledb_array.md b/apis/python/doc/tiledb_array.md
@@ -10,7 +10,7 @@
 class TileDBArray(TileDBObject)
 ```
 
-Wraps arrays from TileDB-Py by retaining a URI, verbose flag, etc.
+Wraps arrays from TileDB-Py by retaining a URI and convenience methods.
 Also serves as an abstraction layer to hide TileDB-specific details from the API, unless
 requested.
 

diff --git a/apis/python/doc/tiledb_object.md b/apis/python/doc/tiledb_object.md
@@ -22,7 +22,6 @@ def __init__(uri: str,
              name: str,
              parent=None,
              soma_options: Optional[SOMAOptions] = None,
-             verbose: Optional[bool] = True,
              ctx: Optional[tiledb.Ctx] = None)
 ```
 

diff --git a/apis/python/examples/soco-slice-query.py b/apis/python/examples/soco-slice-query.py
@@ -36,9 +36,7 @@ def soco_query_and_store(
 
     if os.path.exists(output_soma_path):
         shutil.rmtree(output_soma_path)
-    soma = tiledbsc.SOMA.from_soma_slice(
-        result_soma_slice, output_soma_path, verbose=False
-    )
+    soma = tiledbsc.SOMA.from_soma_slice(result_soma_slice, output_soma_path)
     print("Wrote", output_soma_path, soma.X.data.shape())
 
 

diff --git a/apis/python/examples/uniformizer.py b/apis/python/examples/uniformizer.py
@@ -37,6 +37,7 @@
 import scipy.stats
 import tiledb
 
+import tiledbsc.logging
 from tiledbsc import SOMA, SOMACollection
 from tiledbsc import io as SOMAio
 
@@ -50,11 +51,17 @@ def main() -> int:
     args = parser.parse_args()
 
     if args.verbose:
+        # tiledbsc.logging.logger.setLevel(logging.INFO)
+        # logger = logging.getLogger('tiledbsc')
+        # logger.setLevel(logging.INFO)
+        # logging.getLogger("tiledbsc").setLevel(logging.INFO)
+        # Not able to get any of the above to 'stick'. The following sets level for the whole app,
+        # not just the tiledbsc library, but that's an acceptable workaround since this CLI does
+        # nothing except invoke the tiledbsc library.
         logging.basicConfig(level=logging.INFO)
 
     uniformizer = Uniformizer(
         atlas_uri=args.atlas_uri,
-        verbose=args.verbose,
     )
     if args.allow_non_primary_data:
         uniformizer._allow_non_primary_data = True
@@ -148,7 +155,6 @@ class Uniformizer:
 
     ctx: tiledb.Ctx
     atlas_uri: str
-    verbose: bool
 
     # You can adapt these to match your organization's schema
     OBS_COLUMNS = [
@@ -175,11 +181,9 @@ class Uniformizer:
     def __init__(
         self,
         atlas_uri: str,
-        verbose: bool = False,
     ):
         self.ctx = self._create_tiledb_ctx()
         self.atlas_uri = atlas_uri
-        self.verbose = verbose
         self._allow_non_primary_data = False
 
     # ----------------------------------------------------------------
@@ -200,7 +204,7 @@ def add_h5ad(self, input_dataset_id: Optional[str], input_h5ad_path) -> int:
         if soma_name in soco:
             raise Exception(f"SOMA {soma_name} is already in SOMACollection {soco.uri}")
 
-        logging.info("Loading H5AD")
+        tiledbsc.logging.logger.info("Loading H5AD")
         ann = anndata.read_h5ad(input_h5ad_path)
 
         self._clean_and_add(ann, soma_name, soco)
@@ -215,7 +219,7 @@ def add_soma(self, input_dataset_id: Optional[str], input_soma_uri: str) -> int:
         if soma_name in soco:
             raise Exception(f"SOMA {soma_name} is already in SOMACollection {soco.uri}")
 
-        logging.info("Loading SOMA")
+        tiledbsc.logging.logger.info("Loading SOMA")
         input_soma = SOMA(input_soma_uri)
         ann = SOMAio.to_anndata(input_soma)
 
@@ -227,9 +231,7 @@ def _init_soco(self) -> SOMACollection:
         """
         Makes sure the destination SOMACollection exists for first write.
         """
-        soco = SOMACollection(
-            self.atlas_uri, name="atlas", ctx=self.ctx, verbose=self.verbose
-        )
+        soco = SOMACollection(self.atlas_uri, name="atlas", ctx=self.ctx)
         soco.create_unless_exists()  # Must be done first, to create the parent directory
         if not soco.exists():
             raise Exception(f"Could not create SOCO at {soco.uri}")
@@ -254,24 +256,24 @@ def _clean_and_add(
         Cleans and uniformizes the data (whether obtained from H5AD or SOMA), writes a new SOMA, adds an
         X/rankit layer, and adds the new SOMA to the SOMACollection.
         """
-        logging.info("Cleaning data")
+        tiledbsc.logging.logger.info("Cleaning data")
         ann = self._clean_and_uniformize(ann)
 
-        logging.info("Creating rankit")
+        tiledbsc.logging.logger.info("Creating rankit")
         X_rankit = _rankit(ann.X)
 
-        logging.info("Saving SOMA")
+        tiledbsc.logging.logger.info("Saving SOMA")
         soma_uri = f"{self.atlas_uri}/{soma_name}"
-        atlas_soma = SOMA(
-            uri=soma_uri, name=soma_name, verbose=self.verbose, ctx=self.ctx
-        )
+        atlas_soma = SOMA(uri=soma_uri, name=soma_name, ctx=self.ctx)
         SOMAio.from_anndata(atlas_soma, ann)
 
-        logging.info(f"Adding SOMA name {atlas_soma.name} at SOMA URI {atlas_soma.uri}")
+        tiledbsc.logging.logger.info(
+            f"Adding SOMA name {atlas_soma.name} at SOMA URI {atlas_soma.uri}"
+        )
         soco.add(atlas_soma)
 
         # Create rankit X layer and save
-        logging.info("Saving rankit layer")
+        tiledbsc.logging.logger.info("Saving rankit layer")
         if "rankit" in atlas_soma.X.keys():
             raise Exception(
                 f"rankit layer already exists in the SOMA {atlas_soma.name} {atlas_soma.uri}"

diff --git a/apis/python/src/tiledbsc/__init__.py b/apis/python/src/tiledbsc/__init__.py
@@ -31,6 +31,8 @@
     except DistributionNotFound:
         __version__ = "unknown"
 
+import logging
+
 from .annotation_matrix import AnnotationMatrix
 from .annotation_matrix_group import AnnotationMatrixGroup
 from .annotation_pairwise_matrix_group import AnnotationPairwiseMatrixGroup

diff --git a/apis/python/src/tiledbsc/annotation_dataframe.py b/apis/python/src/tiledbsc/annotation_dataframe.py
@@ -6,6 +6,7 @@
 
 import tiledbsc.util as util
 
+from .logging import logger
 from .tiledb_array import TileDBArray
 from .tiledb_group import TileDBGroup
 
@@ -191,9 +192,8 @@ def from_dataframe(self, dataframe: pd.DataFrame, extent: int = 2048) -> None:
         dim_filters = tiledb.FilterList([tiledb.ZstdFilter(level=-1)])
         attr_filters = tiledb.FilterList([tiledb.ZstdFilter(level=-1)])
 
-        if self._verbose:
-            s = util.get_start_stamp()
-            print(f"{self._indent}START  WRITING {self.uri}")
+        s = util.get_start_stamp()
+        logger.info(f"{self._indent}START  WRITING {self.uri}")
 
         # Make the row-names column (barcodes for obs, gene names for var) explicitly named.
         # Otherwise it'll be called '__tiledb_rows'.
@@ -223,8 +223,7 @@ def from_dataframe(self, dataframe: pd.DataFrame, extent: int = 2048) -> None:
         mode = "ingest"
         if self.exists():
             mode = "append"
-            if self._verbose:
-                print(f"{self._indent}Re-using existing array {self.uri}")
+            logger.info(f"{self._indent}Re-using existing array {self.uri}")
 
         # ISSUE:
         # TileDB attributes can be stored as Unicode but they are not yet queryable via the TileDB
@@ -277,5 +276,4 @@ def from_dataframe(self, dataframe: pd.DataFrame, extent: int = 2048) -> None:
 
         self._set_object_type_metadata()
 
-        if self._verbose:
-            print(util.format_elapsed(s, f"{self._indent}FINISH WRITING {self.uri}"))
+        logger.info(util.format_elapsed(s, f"{self._indent}FINISH WRITING {self.uri}"))
diff --git a/apis/python/src/tiledbsc/annotation_matrix.py b/apis/python/src/tiledbsc/annotation_matrix.py
@@ -5,6 +5,7 @@
 
 import tiledbsc.util as util
 
+from .logging import logger
 from .tiledb_array import TileDBArray
 from .tiledb_group import TileDBGroup
 
@@ -95,9 +96,8 @@ def from_matrix_and_dim_values(self, matrix, dim_values) -> None:
         :param dim_values: anndata.obs_names, anndata.var_names, or anndata.raw.var_names.
         """
 
-        if self._verbose:
-            s = util.get_start_stamp()
-            print(f"{self._indent}START  WRITING {self.uri}")
+        s = util.get_start_stamp()
+        logger.info(f"{self._indent}START  WRITING {self.uri}")
 
         if isinstance(matrix, pd.DataFrame):
             self._from_pandas_dataframe(matrix, dim_values)
@@ -106,8 +106,7 @@ def from_matrix_and_dim_values(self, matrix, dim_values) -> None:
 
         self._set_object_type_metadata()
 
-        if self._verbose:
-            print(util.format_elapsed(s, f"{self._indent}FINISH WRITING {self.uri}"))
+        logger.info(util.format_elapsed(s, f"{self._indent}FINISH WRITING {self.uri}"))
 
     # ----------------------------------------------------------------
     def _numpy_ndarray_or_scipy_sparse_csr_matrix(self, matrix, dim_values) -> None:
@@ -119,8 +118,7 @@ def _numpy_ndarray_or_scipy_sparse_csr_matrix(self, matrix, dim_values) -> None:
 
         # Ingest annotation matrices as 1D/multi-attribute sparse arrays
         if self.exists():
-            if self._verbose:
-                print(f"{self._indent}Re-using existing array {self.uri}")
+            logger.info(f"{self._indent}Re-using existing array {self.uri}")
         else:
             self._create_empty_array([matrix.dtype] * nattr, attr_names)
 
@@ -133,8 +131,7 @@ def _from_pandas_dataframe(self, df, dim_values) -> None:
 
         # Ingest annotation matrices as 1D/multi-attribute sparse arrays
         if self.exists():
-            if self._verbose:
-                print(f"{self._indent}Re-using existing array {self.uri}")
+            logger.info(f"{self._indent}Re-using existing array {self.uri}")
         else:
             self._create_empty_array(list(df.dtypes), attr_names)
 

diff --git a/apis/python/src/tiledbsc/annotation_matrix_group.py b/apis/python/src/tiledbsc/annotation_matrix_group.py
@@ -8,6 +8,7 @@
 import tiledbsc.util as util
 
 from .annotation_matrix import AnnotationMatrix
+from .logging import logger
 from .tiledb_group import TileDBGroup
 
 
@@ -105,36 +106,29 @@ def to_dict_of_csr(self) -> Dict[str, scipy.sparse.csr_matrix]:
         if (
             not self.exists()
         ):  # Not all groups have all four of obsm, obsp, varm, and varp.
-            if self._verbose:
-                print(f"{self._indent}{self.uri} not found")
+            logger.info(f"{self._indent}{self.uri} not found")
             return {}
 
-        if self._verbose:
-            s = util.get_start_stamp()
-            print(f"{self._indent}START  read {self.uri}")
+        s = util.get_start_stamp()
+        logger.info(f"{self._indent}START  read {self.uri}")
 
         with self._open() as G:
             matrices_in_group = {}
             for element in G:
-                if self._verbose:
-                    s2 = util.get_start_stamp()
-                    print(f"{self._indent}START  read {element.uri}")
+                s2 = util.get_start_stamp()
+                logger.info(f"{self._indent}START  read {element.uri}")
 
                 with tiledb.open(element.uri, ctx=self._ctx) as A:
                     df = pd.DataFrame(A[:])
                     df.set_index(self.dim_name, inplace=True)
                     matrix_name = os.path.basename(element.uri)  # e.g. 'X_pca'
                     matrices_in_group[matrix_name] = df.to_numpy()
 
-                if self._verbose:
-                    print(
-                        util.format_elapsed(
-                            s2, f"{self._indent}FINISH read {element.uri}"
-                        )
-                    )
+                logger.info(
+                    util.format_elapsed(s2, f"{self._indent}FINISH read {element.uri}")
+                )
 
-        if self._verbose:
-            print(util.format_elapsed(s, f"{self._indent}FINISH read {self.uri}"))
+        logger.info(util.format_elapsed(s, f"{self._indent}FINISH read {self.uri}"))
 
         return matrices_in_group
 

diff --git a/apis/python/src/tiledbsc/annotation_pairwise_matrix_group.py b/apis/python/src/tiledbsc/annotation_pairwise_matrix_group.py
@@ -8,6 +8,7 @@
 
 from .annotation_dataframe import AnnotationDataFrame
 from .assay_matrix import AssayMatrix
+from .logging import logger
 from .tiledb_group import TileDBGroup
 
 
@@ -137,34 +138,29 @@ def to_dict_of_csr(
         except tiledb.TileDBError:
             pass
         if grp is None:
-            if self._verbose:
-                print(f"{self._indent}{self.uri} not found")
+            logger.info(f"{self._indent}{self.uri} not found")
             return {}
 
-        if self._verbose:
-            s = util.get_start_stamp()
-            print(f"{self._indent}START  read {self.uri}")
+        s = util.get_start_stamp()
+        logger.info(f"{self._indent}START  read {self.uri}")
 
         matrices_in_group = {}
         for element in self:
-            if self._verbose:
-                s2 = util.get_start_stamp()
-                print(f"{self._indent}START  read {element.uri}")
+            s2 = util.get_start_stamp()
+            logger.info(f"{self._indent}START  read {element.uri}")
 
             matrix_name = os.path.basename(element.uri)  # TODO: fix for tiledb cloud
             matrices_in_group[matrix_name] = element.to_csr_matrix(
                 obs_df_index, var_df_index
             )
 
-            if self._verbose:
-                print(
-                    util.format_elapsed(s2, f"{self._indent}FINISH read {element.uri}")
-                )
+            logger.info(
+                util.format_elapsed(s2, f"{self._indent}FINISH read {element.uri}")
+            )
 
         grp.close()
 
-        if self._verbose:
-            print(util.format_elapsed(s, f"{self._indent}FINISH read {self.uri}"))
+        logger.info(util.format_elapsed(s, f"{self._indent}FINISH read {self.uri}"))
 
         return matrices_in_group