Skip to content

Commit

Permalink
pylint
Browse files Browse the repository at this point in the history
  • Loading branch information
Mittmich committed Oct 7, 2023
1 parent 39bcc93 commit 8968c15
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 54 deletions.
2 changes: 2 additions & 0 deletions .pylintrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[MASTER]
extension-pkg-whitelist=pydantic # This is needed since pydantic has binary extensions
1 change: 0 additions & 1 deletion spoc/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from spoc.fragments import FragmentAnnotator, FragmentExpander
from spoc.io import FileManager
from spoc.pixels import GenomicBinner
from spoc.file_parameter_models import ContactsParameters


@click.group()
Expand Down
32 changes: 16 additions & 16 deletions spoc/contacts.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
"""Managing multi-way contacts."""

from __future__ import annotations # needed for self reference in type hints
from typing import List, Union, Optional
from itertools import permutations, product
from typing import List, Optional, Dict
import pandas as pd
import dask.dataframe as dd
from typing import Union, Optional, Dict
from itertools import permutations, product
import numpy as np
from spoc.dataframe_models import ContactSchema, DataFrame
from spoc.file_parameter_models import ContactsParameters
import numpy as np


class Contacts:
Expand Down Expand Up @@ -84,10 +83,12 @@ def get_chromosome_values(self) -> List[str]:

@property
def data(self):
"""Returns the contact data"""
return self._data

@data.setter
def data(self, contact_frame):
"""Sets the contact data"""
self._data = self._schema.validate(contact_frame)

def __repr__(self) -> str:
Expand All @@ -101,9 +102,9 @@ class ContactManipulator:
def merge_contacts(self, merge_list: List[Contacts]) -> Contacts:
"""Merge contacts"""
# validate that merge is possible
if len(set([i.number_fragments for i in merge_list])) != 1:
if len({i.number_fragments for i in merge_list}) != 1:
raise ValueError("All contacts need to have the same order!")
if len(set([i.is_dask for i in merge_list])) != 1:
if len({i.is_dask for i in merge_list}) != 1:
raise ValueError("Mixture of dask and pandas dataframes is not supported!")
# TODO: assert all have same labelling state
number_fragments = merge_list[0].number_fragments
Expand Down Expand Up @@ -386,7 +387,7 @@ def subset_on_metadata(
label_values = contacts.get_label_values()
# check if metadata_combi is compatible with label values
assert all(
[i in label_values for i in metadata_combi]
i in label_values for i in metadata_combi
), "Metadata combination is not compatible with label values!"
# subset contacts
query = " and ".join(
Expand Down Expand Up @@ -420,12 +421,11 @@ def flip_symmetric_contacts(
binary_labels_equal=contacts.binary_labels_equal,
symmetry_flipped=True,
)
else:
result = self._flip_unlabelled_contacts(contacts.data)
if sort_chromosomes:
result = self._sort_chromosomes(result, contacts.number_fragments)
return Contacts(
result,
number_fragments=contacts.number_fragments,
symmetry_flipped=True,
)
result = self._flip_unlabelled_contacts(contacts.data)
if sort_chromosomes:
result = self._sort_chromosomes(result, contacts.number_fragments)
return Contacts(
result,
number_fragments=contacts.number_fragments,
symmetry_flipped=True,
)
28 changes: 13 additions & 15 deletions spoc/dataframe_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@


class ContactSchema:
"""Dynamic schema for n-way contacts"""
"""Dynamic schema for N-way contacts"""

# field groups

Expand Down Expand Up @@ -69,14 +69,14 @@ def __init__(

@classmethod
def get_contact_fields(cls, contains_metadata: bool) -> Dict:
"""returns contact fields"""
if contains_metadata:
return copy.deepcopy(cls.contact_fields)
else:
return {
key: value
for key, value in copy.deepcopy(cls.contact_fields).items()
if key not in ["metadata"]
}
return {
key: value
for key, value in copy.deepcopy(cls.contact_fields).items()
if key not in ["metadata"]
}

def _expand_contact_fields(
self, expansions: Iterable = (1, 2, 3), contains_metadata: bool = True
Expand Down Expand Up @@ -104,7 +104,7 @@ def validate(self, data_frame: DataFrame) -> DataFrame:


class PixelSchema:
"""Dynamic schema for n-way pixels"""
"""Dynamic schema for N-way pixels"""

def __init__(self, number_fragments: int = 3, same_chromosome: bool = True) -> None:
self._number_fragments = number_fragments
Expand All @@ -122,8 +122,7 @@ def _get_contact_fields(self):
return {
"start": pa.Column(int),
}
else:
return {"chrom": pa.Column(str), "start": pa.Column(int)}
return {"chrom": pa.Column(str), "start": pa.Column(int)}

def _get_constant_fields(self):
if self._same_chromosome:
Expand All @@ -132,11 +131,10 @@ def _get_constant_fields(self):
"count": pa.Column(int),
"corrected_count": pa.Column(float, required=False),
}
else:
return {
"count": pa.Column(int),
"corrected_count": pa.Column(float, required=False),
}
return {
"count": pa.Column(int),
"corrected_count": pa.Column(float, required=False),
}

def _expand_contact_fields(self, expansions: Iterable = (1, 2, 3)) -> dict:
"""adds suffixes to fields"""
Expand Down
2 changes: 1 addition & 1 deletion spoc/file_parameter_models.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""This file contains data classes for parameters
of spoc data structures"""
from pydantic import BaseModel
from typing import Optional, List
from pydantic import BaseModel


class ContactsParameters(BaseModel):
Expand Down
11 changes: 6 additions & 5 deletions spoc/fragments.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
as well as expanding fragments to contacts."""

from typing import Dict, Union
from itertools import combinations
import pandas as pd
import dask.dataframe as dd
import numpy as np
from itertools import combinations
from .dataframe_models import FragmentSchema, ContactSchema, DataFrame
from .contacts import Contacts

Expand All @@ -16,20 +16,21 @@ class Fragments:

def __init__(self, fragment_frame: DataFrame) -> None:
self._data = FragmentSchema.validate(fragment_frame)
self._contains_metadata = (
True if "metadata" in fragment_frame.columns else False
)
self._contains_metadata = "metadata" in fragment_frame.columns

@property
def data(self):
"""Returns the underlying dataframe"""
return self._data

@property
def contains_metadata(self):
"""Returns whether the dataframe contains metadata"""
return self._contains_metadata

@property
def is_dask(self):
"""Returns whether the underlying dataframe is dask"""
return isinstance(self._data, dd.DataFrame)


Expand Down Expand Up @@ -131,7 +132,7 @@ def expand(self, fragments: Fragments) -> Contacts:
if fragments.is_dask:
kwargs = dict(meta=self._get_expansion_output_structure())
else:
kwargs = dict()
kwargs = {}
# expand
contact_df = (
fragments.data.groupby(["read_name", "read_length"])
Expand Down
5 changes: 2 additions & 3 deletions spoc/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from pydantic import BaseModel
from spoc.contacts import Contacts
from spoc.pixels import Pixels
from spoc.dataframe_models import FragmentSchema, ContactSchema, DataFrame
from spoc.file_parameter_models import ContactsParameters, PixelParameters
from spoc.fragments import Fragments

Expand Down Expand Up @@ -130,7 +129,7 @@ def _load_pixel_metadata(path: str):
"""Load metadata"""
metadata_path = Path(path) / "metadata.json"
if metadata_path.exists():
with open(metadata_path, "r") as f:
with open(metadata_path, "r", encoding='UTF-8') as f:
metadata = json.load(f)
else:
raise ValueError(f"Metadata file not found at {metadata_path}")
Expand Down Expand Up @@ -193,5 +192,5 @@ def write_pixels(self, path: str, pixels: Pixels) -> None:
pixels.data.to_parquet(write_path, row_group_size=1024 * 1024)
# write metadata
current_metadata[write_path.name] = pixels.get_global_parameters().dict()
with open(metadata_path, "w") as f:
with open(metadata_path, "w", encoding='UTF-8') as f:
json.dump(current_metadata, f)
32 changes: 19 additions & 13 deletions spoc/pixels.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
"""This part of spoc is responsible for binned,
higher order contacts in the form of 'genomic pixels'"""
from pathlib import Path
from typing import Union, Optional, List
import pandas as pd
import dask.dataframe as dd
import bioframe as bf
import pyranges as pr
from typing import Union, Optional, List
from spoc.dataframe_models import ContactSchema, PixelSchema
from spoc.dataframe_models import PixelSchema
from spoc.file_parameter_models import PixelParameters
from spoc.contacts import Contacts

Expand Down Expand Up @@ -50,9 +48,7 @@ def __init__(
self._symmetry_flipped = symmetry_flipped
self._metadata_combi = metadata_combi
self._label_sorted = label_sorted
if isinstance(pixel_source, pd.DataFrame) or isinstance(
pixel_source, dd.DataFrame
):
if isinstance(pixel_source, (pd.DataFrame, dd.DataFrame)):
self._data = self._schema.validate(pixel_source)
self._path = None
else:
Expand All @@ -62,6 +58,7 @@ def __init__(
self._path = Path(pixel_source)
self._data = None

@staticmethod
def from_uri(uri, mode="path"):
"""Construct pixels from uri.
Will match parameters based on the following order:
Expand All @@ -74,10 +71,11 @@ def from_uri(uri, mode="path"):
Mode can be one of pandas|dask|path, which corresponds to the type of the pixel source.
"""
# import here to avoid circular imports
#pylint: disable=import-outside-toplevel
from spoc.io import FileManager

# Define uir parameters
PARAMETERS = [
# Define uri parameters
uri_parameters = [
"number_fragments",
"binsize",
"metadata_combi",
Expand All @@ -93,7 +91,7 @@ def from_uri(uri, mode="path"):
raise ValueError(
f"Uri: {uri} is not valid. Must contain at least Path, number_fragments and binsize"
)
params = {key: value for key, value in zip(PARAMETERS, uri[1:])}
params = dict(zip(uri_parameters, uri[1:]))
# rewrite metadata_combi parameter
if "metadata_combi" in params.keys() and params["metadata_combi"] != "None":
params["metadata_combi"] = str(list(params["metadata_combi"]))
Expand All @@ -102,7 +100,7 @@ def from_uri(uri, mode="path"):
load_dataframe = False
use_dask = False
elif mode == "pandas":
laod_dataframe = True
load_dataframe = True
use_dask = False
else:
load_dataframe = True
Expand All @@ -113,12 +111,12 @@ def from_uri(uri, mode="path"):
matched_pixels = [
pixel
for pixel in available_pixels
if all(params[key] == str(pixel.dict()[key]) for key in params.keys())
if all(value == str(pixel.dict()[key]) for key, value in params.items())
]
# check whether there is a unique match
if len(matched_pixels) == 0:
raise ValueError(f"No pixels found for uri: {uri}")
elif len(matched_pixels) > 1:
if len(matched_pixels) > 1:
raise ValueError(f"Multiple pixels found for uri: {uri}")
return FileManager(use_dask=use_dask).load_pixels(
uri[0], matched_pixels[0], load_dataframe=load_dataframe
Expand All @@ -138,34 +136,42 @@ def get_global_parameters(self):

@property
def path(self):
"""Returns path of pixels"""
return self._path

@property
def data(self):
"""Returns pixels as dataframe"""
return self._data

@property
def number_fragments(self):
"""Returns number of fragments in pixels"""
return self._number_fragments

@property
def binsize(self):
"""Returns binsize of pixels"""
return self._binsize

@property
def binary_labels_equal(self):
"""Returns whether binary labels are equal"""
return self._binary_labels_equal

@property
def symmetry_flipped(self):
"""Returns whether pixels are symmetry flipped"""
return self._symmetry_flipped

@property
def metadata_combi(self):
"""Returns metadata combination of pixels"""
return self._metadata_combi

@property
def same_chromosome(self):
"""Returns whether pixels are on same chromosome"""
return self._same_chromosome


Expand Down

0 comments on commit 8968c15

Please sign in to comment.