Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use logging.info for ingest/outgest prints #186

Merged
merged 5 commits into from
Jun 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion apis/python/README-dev.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
* Most things are configured using GitHub Actions at `../../.github/workflows`
* Pre-push suggestions:
* `black .`
* `black . tools/[a-z]*`
* `isort . tools/[a-z]*`
* `flake8 . tools/[a-z]*`
* `python -m pytest tests`
* PyPI:
* https://pypi.org/project/tiledbsc/
Expand Down
2 changes: 1 addition & 1 deletion apis/python/doc/tiledb_array.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
class TileDBArray(TileDBObject)
```

Wraps arrays from TileDB-Py by retaining a URI, verbose flag, etc.
Wraps arrays from TileDB-Py by retaining a URI and convenience methods.
Also serves as an abstraction layer to hide TileDB-specific details from the API, unless
requested.

Expand Down
1 change: 0 additions & 1 deletion apis/python/doc/tiledb_object.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ def __init__(uri: str,
name: str,
parent=None,
soma_options: Optional[SOMAOptions] = None,
verbose: Optional[bool] = True,
ctx: Optional[tiledb.Ctx] = None)
```

Expand Down
4 changes: 1 addition & 3 deletions apis/python/examples/soco-slice-query.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,7 @@ def soco_query_and_store(

if os.path.exists(output_soma_path):
shutil.rmtree(output_soma_path)
soma = tiledbsc.SOMA.from_soma_slice(
result_soma_slice, output_soma_path, verbose=False
)
soma = tiledbsc.SOMA.from_soma_slice(result_soma_slice, output_soma_path)
print("Wrote", output_soma_path, soma.X.data.shape())


Expand Down
36 changes: 19 additions & 17 deletions apis/python/examples/uniformizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import scipy.stats
import tiledb

import tiledbsc.logging
from tiledbsc import SOMA, SOMACollection
from tiledbsc import io as SOMAio

Expand All @@ -50,11 +51,17 @@ def main() -> int:
args = parser.parse_args()

if args.verbose:
# tiledbsc.logging.logger.setLevel(logging.INFO)
# logger = logging.getLogger('tiledbsc')
# logger.setLevel(logging.INFO)
# logging.getLogger("tiledbsc").setLevel(logging.INFO)
# Not able to get any of the above to 'stick'. The following sets level for the whole app,
# not just the tiledbsc library, but that's an acceptable workaround since this CLI does
# nothing except invoke the tiledbsc library.
logging.basicConfig(level=logging.INFO)

uniformizer = Uniformizer(
atlas_uri=args.atlas_uri,
verbose=args.verbose,
)
if args.allow_non_primary_data:
uniformizer._allow_non_primary_data = True
Expand Down Expand Up @@ -148,7 +155,6 @@ class Uniformizer:

ctx: tiledb.Ctx
atlas_uri: str
verbose: bool

# You can adapt these to match your organization's schema
OBS_COLUMNS = [
Expand All @@ -175,11 +181,9 @@ class Uniformizer:
def __init__(
self,
atlas_uri: str,
verbose: bool = False,
):
self.ctx = self._create_tiledb_ctx()
self.atlas_uri = atlas_uri
self.verbose = verbose
self._allow_non_primary_data = False

# ----------------------------------------------------------------
Expand All @@ -200,7 +204,7 @@ def add_h5ad(self, input_dataset_id: Optional[str], input_h5ad_path) -> int:
if soma_name in soco:
raise Exception(f"SOMA {soma_name} is already in SOMACollection {soco.uri}")

logging.info("Loading H5AD")
tiledbsc.logging.logger.info("Loading H5AD")
ann = anndata.read_h5ad(input_h5ad_path)

self._clean_and_add(ann, soma_name, soco)
Expand All @@ -215,7 +219,7 @@ def add_soma(self, input_dataset_id: Optional[str], input_soma_uri: str) -> int:
if soma_name in soco:
raise Exception(f"SOMA {soma_name} is already in SOMACollection {soco.uri}")

logging.info("Loading SOMA")
tiledbsc.logging.logger.info("Loading SOMA")
input_soma = SOMA(input_soma_uri)
ann = SOMAio.to_anndata(input_soma)

Expand All @@ -227,9 +231,7 @@ def _init_soco(self) -> SOMACollection:
"""
Makes sure the destination SOMACollection exists for first write.
"""
soco = SOMACollection(
self.atlas_uri, name="atlas", ctx=self.ctx, verbose=self.verbose
)
soco = SOMACollection(self.atlas_uri, name="atlas", ctx=self.ctx)
soco.create_unless_exists() # Must be done first, to create the parent directory
if not soco.exists():
raise Exception(f"Could not create SOCO at {soco.uri}")
Expand All @@ -254,24 +256,24 @@ def _clean_and_add(
Cleans and uniformizes the data (whether obtained from H5AD or SOMA), writes a new SOMA, adds an
X/rankit layer, and adds the new SOMA to the SOMACollection.
"""
logging.info("Cleaning data")
tiledbsc.logging.logger.info("Cleaning data")
ann = self._clean_and_uniformize(ann)

logging.info("Creating rankit")
tiledbsc.logging.logger.info("Creating rankit")
X_rankit = _rankit(ann.X)

logging.info("Saving SOMA")
tiledbsc.logging.logger.info("Saving SOMA")
soma_uri = f"{self.atlas_uri}/{soma_name}"
atlas_soma = SOMA(
uri=soma_uri, name=soma_name, verbose=self.verbose, ctx=self.ctx
)
atlas_soma = SOMA(uri=soma_uri, name=soma_name, ctx=self.ctx)
SOMAio.from_anndata(atlas_soma, ann)

logging.info(f"Adding SOMA name {atlas_soma.name} at SOMA URI {atlas_soma.uri}")
tiledbsc.logging.logger.info(
f"Adding SOMA name {atlas_soma.name} at SOMA URI {atlas_soma.uri}"
)
soco.add(atlas_soma)

# Create rankit X layer and save
logging.info("Saving rankit layer")
tiledbsc.logging.logger.info("Saving rankit layer")
if "rankit" in atlas_soma.X.keys():
raise Exception(
f"rankit layer already exists in the SOMA {atlas_soma.name} {atlas_soma.uri}"
Expand Down
2 changes: 2 additions & 0 deletions apis/python/src/tiledbsc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
except DistributionNotFound:
__version__ = "unknown"

import logging

from .annotation_matrix import AnnotationMatrix
from .annotation_matrix_group import AnnotationMatrixGroup
from .annotation_pairwise_matrix_group import AnnotationPairwiseMatrixGroup
Expand Down
12 changes: 5 additions & 7 deletions apis/python/src/tiledbsc/annotation_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import tiledbsc.util as util

from .logging import logger
from .tiledb_array import TileDBArray
from .tiledb_group import TileDBGroup

Expand Down Expand Up @@ -191,9 +192,8 @@ def from_dataframe(self, dataframe: pd.DataFrame, extent: int = 2048) -> None:
dim_filters = tiledb.FilterList([tiledb.ZstdFilter(level=-1)])
attr_filters = tiledb.FilterList([tiledb.ZstdFilter(level=-1)])

if self._verbose:
s = util.get_start_stamp()
print(f"{self._indent}START WRITING {self.uri}")
s = util.get_start_stamp()
logger.info(f"{self._indent}START WRITING {self.uri}")

# Make the row-names column (barcodes for obs, gene names for var) explicitly named.
# Otherwise it'll be called '__tiledb_rows'.
Expand Down Expand Up @@ -223,8 +223,7 @@ def from_dataframe(self, dataframe: pd.DataFrame, extent: int = 2048) -> None:
mode = "ingest"
if self.exists():
mode = "append"
if self._verbose:
print(f"{self._indent}Re-using existing array {self.uri}")
logger.info(f"{self._indent}Re-using existing array {self.uri}")

# ISSUE:
# TileDB attributes can be stored as Unicode but they are not yet queryable via the TileDB
Expand Down Expand Up @@ -277,5 +276,4 @@ def from_dataframe(self, dataframe: pd.DataFrame, extent: int = 2048) -> None:

self._set_object_type_metadata()

if self._verbose:
print(util.format_elapsed(s, f"{self._indent}FINISH WRITING {self.uri}"))
logger.info(util.format_elapsed(s, f"{self._indent}FINISH WRITING {self.uri}"))
15 changes: 6 additions & 9 deletions apis/python/src/tiledbsc/annotation_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import tiledbsc.util as util

from .logging import logger
from .tiledb_array import TileDBArray
from .tiledb_group import TileDBGroup

Expand Down Expand Up @@ -95,9 +96,8 @@ def from_matrix_and_dim_values(self, matrix, dim_values) -> None:
:param dim_values: anndata.obs_names, anndata.var_names, or anndata.raw.var_names.
"""

if self._verbose:
s = util.get_start_stamp()
print(f"{self._indent}START WRITING {self.uri}")
s = util.get_start_stamp()
logger.info(f"{self._indent}START WRITING {self.uri}")

if isinstance(matrix, pd.DataFrame):
self._from_pandas_dataframe(matrix, dim_values)
Expand All @@ -106,8 +106,7 @@ def from_matrix_and_dim_values(self, matrix, dim_values) -> None:

self._set_object_type_metadata()

if self._verbose:
print(util.format_elapsed(s, f"{self._indent}FINISH WRITING {self.uri}"))
logger.info(util.format_elapsed(s, f"{self._indent}FINISH WRITING {self.uri}"))

# ----------------------------------------------------------------
def _numpy_ndarray_or_scipy_sparse_csr_matrix(self, matrix, dim_values) -> None:
Expand All @@ -119,8 +118,7 @@ def _numpy_ndarray_or_scipy_sparse_csr_matrix(self, matrix, dim_values) -> None:

# Ingest annotation matrices as 1D/multi-attribute sparse arrays
if self.exists():
if self._verbose:
print(f"{self._indent}Re-using existing array {self.uri}")
logger.info(f"{self._indent}Re-using existing array {self.uri}")
else:
self._create_empty_array([matrix.dtype] * nattr, attr_names)

Expand All @@ -133,8 +131,7 @@ def _from_pandas_dataframe(self, df, dim_values) -> None:

# Ingest annotation matrices as 1D/multi-attribute sparse arrays
if self.exists():
if self._verbose:
print(f"{self._indent}Re-using existing array {self.uri}")
logger.info(f"{self._indent}Re-using existing array {self.uri}")
else:
self._create_empty_array(list(df.dtypes), attr_names)

Expand Down
26 changes: 10 additions & 16 deletions apis/python/src/tiledbsc/annotation_matrix_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import tiledbsc.util as util

from .annotation_matrix import AnnotationMatrix
from .logging import logger
from .tiledb_group import TileDBGroup


Expand Down Expand Up @@ -105,36 +106,29 @@ def to_dict_of_csr(self) -> Dict[str, scipy.sparse.csr_matrix]:
if (
not self.exists()
): # Not all groups have all four of obsm, obsp, varm, and varp.
if self._verbose:
print(f"{self._indent}{self.uri} not found")
logger.info(f"{self._indent}{self.uri} not found")
return {}

if self._verbose:
s = util.get_start_stamp()
print(f"{self._indent}START read {self.uri}")
s = util.get_start_stamp()
logger.info(f"{self._indent}START read {self.uri}")

with self._open() as G:
matrices_in_group = {}
for element in G:
if self._verbose:
s2 = util.get_start_stamp()
print(f"{self._indent}START read {element.uri}")
s2 = util.get_start_stamp()
logger.info(f"{self._indent}START read {element.uri}")

with tiledb.open(element.uri, ctx=self._ctx) as A:
df = pd.DataFrame(A[:])
df.set_index(self.dim_name, inplace=True)
matrix_name = os.path.basename(element.uri) # e.g. 'X_pca'
matrices_in_group[matrix_name] = df.to_numpy()

if self._verbose:
print(
util.format_elapsed(
s2, f"{self._indent}FINISH read {element.uri}"
)
)
logger.info(
util.format_elapsed(s2, f"{self._indent}FINISH read {element.uri}")
)

if self._verbose:
print(util.format_elapsed(s, f"{self._indent}FINISH read {self.uri}"))
logger.info(util.format_elapsed(s, f"{self._indent}FINISH read {self.uri}"))

return matrices_in_group

Expand Down
24 changes: 10 additions & 14 deletions apis/python/src/tiledbsc/annotation_pairwise_matrix_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from .annotation_dataframe import AnnotationDataFrame
from .assay_matrix import AssayMatrix
from .logging import logger
from .tiledb_group import TileDBGroup


Expand Down Expand Up @@ -137,34 +138,29 @@ def to_dict_of_csr(
except tiledb.TileDBError:
pass
if grp is None:
if self._verbose:
print(f"{self._indent}{self.uri} not found")
logger.info(f"{self._indent}{self.uri} not found")
return {}

if self._verbose:
s = util.get_start_stamp()
print(f"{self._indent}START read {self.uri}")
s = util.get_start_stamp()
logger.info(f"{self._indent}START read {self.uri}")

matrices_in_group = {}
for element in self:
if self._verbose:
s2 = util.get_start_stamp()
print(f"{self._indent}START read {element.uri}")
s2 = util.get_start_stamp()
logger.info(f"{self._indent}START read {element.uri}")

matrix_name = os.path.basename(element.uri) # TODO: fix for tiledb cloud
matrices_in_group[matrix_name] = element.to_csr_matrix(
obs_df_index, var_df_index
)

if self._verbose:
print(
util.format_elapsed(s2, f"{self._indent}FINISH read {element.uri}")
)
logger.info(
util.format_elapsed(s2, f"{self._indent}FINISH read {element.uri}")
)

grp.close()

if self._verbose:
print(util.format_elapsed(s, f"{self._indent}FINISH read {self.uri}"))
logger.info(util.format_elapsed(s, f"{self._indent}FINISH read {self.uri}"))

return matrices_in_group

Expand Down
Loading