added google style docstrings

gerlichlab · Oct 7, 2023 · 26d1f2a · 26d1f2a
1 parent 1f8eaab
commit 26d1f2a
Show file tree

Hide file tree

Showing 6 changed files with 391 additions and 63 deletions.
diff --git a/spoc/cli.py b/spoc/cli.py
@@ -22,7 +22,15 @@ def main():
     help="Number of fragments per read to expand",
 )
 def expand(fragments_path, expanded_contacts_path, n_fragments):
-    """Script for expanding labelled fragments to contacts"""
+    """
+    Script for expanding labelled fragments to contacts
+
+    Args:
+        fragments_path (str): Path to the labelled fragments file.
+        expanded_contacts_path (str): Path to the output contacts file.
+        n_fragments (int, optional): Number of fragments per read to expand. Defaults to 3.
+    
+    """
     expander = FragmentExpander(number_fragments=n_fragments)
     file_manager = FileManager()
     input_fragments = file_manager.load_fragments(fragments_path)
@@ -35,7 +43,14 @@ def expand(fragments_path, expanded_contacts_path, n_fragments):
 @click.argument("label_library_path")
 @click.argument("labelled_fragments_path")
 def annotate(fragments_path, label_library_path, labelled_fragments_path):
-    """Script for annotating porec fragments"""
+    """Script for annotating porec fragments
+
+    Args:
+        fragments_path (str): Path to the input fragments file.
+        label_library_path (str): Path to the label library file.
+        labelled_fragments_path (str): Path to the output labelled fragments file.
+    
+    """
     file_manager = FileManager()
     label_library = file_manager.load_label_library(label_library_path)
     annotator = FragmentAnnotator(label_library)
@@ -55,7 +70,15 @@ def bin_contacts(
     bin_size,
     same_chromosome,
 ):
-    """Script for binning contacts"""
+    """Script for binning contacts
+    
+    Args:
+        contact_path (str): Path to the input contact file.
+        pixel_path (str): Path to the output pixel file.
+        bin_size (int, optional): Size of the bins. Defaults to 10000.
+        same_chromosome (bool, optional): Only bin contacts on the same chromosome. Defaults to False.
+    
+    """
     # load data from disk
     file_manager = FileManager(use_dask=True)
     contacts = file_manager.load_contacts(contact_path)
@@ -75,7 +98,12 @@ def merge():
 @click.argument("contact_paths", nargs=-1)
 @click.option("-o", "--output", help="output path")
 def merge_contacts(contact_paths, output):
-    """Functionality to merge annotated fragments"""
+    """Functionality to merge annotated fragments
+    
+    Args:
+        contact_paths (tuple): Paths to the input contact files.
+        output (str, optional): Path to the output merged contact file.
+    """
     file_manager = FileManager(use_dask=True)
     manipulator = ContactManipulator()
     contact_files = [file_manager.load_contacts(path) for path in contact_paths]

diff --git a/spoc/contacts.py b/spoc/contacts.py
@@ -11,7 +11,25 @@
 
 
 class Contacts:
-    """N-way genomic contacts"""
+    """N-way genomic contacts
+    
+    Args:
+        contact_frame (DataFrame): DataFrame containing the contact data.
+        number_fragments (int, optional): Number of fragments. Defaults to None.
+        metadata_combi (List[str], optional): List of metadata combinations. Defaults to None.
+        label_sorted (bool, optional): Whether the labels are sorted. Defaults to False.
+        binary_labels_equal (bool, optional): Whether the binary labels are equal. Defaults to False.
+        symmetry_flipped (bool, optional): Whether the symmetry is flipped. Defaults to False.
+    
+    Attributes:
+        contains_metadata (bool): Whether the contact data contains metadata.
+        number_fragments (int): Number of fragments.
+        is_dask (bool): Whether the contact data is a Dask DataFrame.
+        metadata_combi (List[str]): List of metadata combinations.
+        label_sorted (bool): Whether the labels are sorted.
+        binary_labels_equal (bool): Whether the binary labels are equal.
+        symmetry_flipped (bool): Whether the symmetry is flipped.
+    """
 
     def __init__(
         self,
@@ -88,7 +106,11 @@ def data(self):
 
     @data.setter
     def data(self, contact_frame):
-        """Sets the contact data"""
+        """Sets the contact data
+        
+        Args:
+            contact_frame (DataFrame): DataFrame containing the contact data.
+        """
         self._data = self._schema.validate(contact_frame)
 
     def __repr__(self) -> str:
@@ -100,7 +122,14 @@ class ContactManipulator:
     contact data such as merging, splitting and subsetting."""
 
     def merge_contacts(self, merge_list: List[Contacts]) -> Contacts:
-        """Merge contacts"""
+        """Merge contacts
+        
+        Args:
+            merge_list (List[Contacts]): List of Contacts objects to merge.
+
+        Returns:
+            Contacts: Merged Contacts object.
+        """
         # validate that merge is possible
         if len({i.number_fragments for i in merge_list}) != 1:
             raise ValueError("All contacts need to have the same order!")
@@ -244,7 +273,14 @@ def _flip_labelled_contacts(
         return result
 
     def sort_labels(self, contacts: Contacts) -> Contacts:
-        """Sorts labels in ascending, alphabetical order"""
+        """Sorts labels in ascending, alphabetical order
+        
+        Args:
+            contacts (Contacts): Contacts object to sort.
+
+        Returns:
+            Contacts: Sorted Contacts object.
+        """
         if not contacts.contains_metadata:
             raise ValueError(
                 "Sorting labels for unlabelled contacts is not implemented."
@@ -329,12 +365,22 @@ def _generate_binary_label_mapping(
         return mapping
 
     def equate_binary_labels(self, contacts: Contacts) -> Contacts:
-        """Binary labels often only carry information about whether
+        """
+        Equate binary labels.
+        
+        Binary labels often only carry information about whether
         they happen between the same or different fragments. This
         method equates these labels be replacing all equivalent binary labels with
         the alphabetically first label.
         For example, if we have a contact between two fragments
         that are labelled B and B, the label will be replaced with AA.
+
+        Args:
+            contacts (Contacts): Contacts object to equate binary labels.
+
+        Returns:
+            Contacts: Contacts object with equated binary labels.
+
         """
         assert contacts.contains_metadata, "Contacts do not contain metadata!"
         if not contacts.label_sorted:
@@ -376,7 +422,16 @@ def equate_binary_labels(self, contacts: Contacts) -> Contacts:
     def subset_on_metadata(
         self, contacts: Contacts, metadata_combi: List[str]
     ) -> Contacts:
-        """Subset contacts based on metadata"""
+        """Subset contacts based on metadata
+        
+        Args:
+            contacts (Contacts): Contacts object to subset.
+            metadata_combi (List[str]): List of metadata combinations to subset on.
+
+        Returns:
+            Contacts: Subsetted Contacts object.
+        
+        """
         # check if metadata is present
         assert contacts.contains_metadata, "Contacts do not contain metadata!"
         # check if metadata_combi has the correct length
@@ -406,7 +461,16 @@ def subset_on_metadata(
     def flip_symmetric_contacts(
         self, contacts: Contacts, sort_chromosomes: bool = False
     ) -> Contacts:
-        """Flips contacts based on inherent symmetry"""
+        """Flips contacts based on inherent symmetry
+        
+        Args:
+            contacts (Contacts): Contacts object to flip symmetric contacts.
+            sort_chromosomes (bool, optional): Whether to sort chromosomes. Defaults to False.
+
+        Returns:
+            Contacts: Contacts object with flipped symmetric contacts.
+        
+        """
         if contacts.contains_metadata:
             if not contacts.label_sorted:
                 contacts = self.sort_labels(contacts)

diff --git a/spoc/dataframe_models.py b/spoc/dataframe_models.py
@@ -34,7 +34,12 @@
 
 
 class ContactSchema:
-    """Dynamic schema for N-way contacts"""
+    """Dynamic schema for N-way contacts
+    
+    Args:
+        number_fragments (int, optional): Number of fragments. Defaults to 3.
+        contains_metadata (bool, optional): Whether the contact data contains metadata. Defaults to True.
+    """
 
     # field groups
 
@@ -69,7 +74,14 @@ def __init__(
 
     @classmethod
     def get_contact_fields(cls, contains_metadata: bool) -> Dict:
-        """returns contact fields"""
+        """returns contact fields
+
+        Args:
+            contains_metadata (bool): Whether the contact data contains metadata.
+
+        Returns:
+            Dict: Dictionary containing the contact fields.
+        """
         if contains_metadata:
             return copy.deepcopy(cls.contact_fields)
         return {
@@ -90,21 +102,34 @@ def _expand_contact_fields(
 
     def validate_header(self, data_frame: DataFrame) -> None:
         """Validates only header, needed to validate that dask taskgraph can be built before
-        evaluation"""
+        evaluation.
+        
+        Args:
+            data_frame (DataFrame): The DataFrame to validate.
+        """
         for column in data_frame.columns:
             if column not in self._schema.columns:
                 raise pa.errors.SchemaError(
                     self._schema, data_frame, "Header is invalid!"
                 )
 
     def validate(self, data_frame: DataFrame) -> DataFrame:
-        """Validate multiway contact dataframe"""
+        """Validate multiway contact dataframe
+        
+        Args:
+            data_frame (DataFrame): The DataFrame to validate.
+        """
         self.validate_header(data_frame)
         return self._schema.validate(data_frame)
 
 
 class PixelSchema:
-    """Dynamic schema for N-way pixels"""
+    """Dynamic schema for N-way pixels
+    
+    Args:
+        number_fragments (int, optional): Number of fragments. Defaults to 3.
+        same_chromosome (bool, optional): Whether the fragments are on the same chromosome. Defaults to True.
+    """
 
     def __init__(self, number_fragments: int = 3, same_chromosome: bool = True) -> None:
         self._number_fragments = number_fragments
@@ -146,13 +171,22 @@ def _expand_contact_fields(self, expansions: Iterable = (1, 2, 3)) -> dict:
 
     def validate_header(self, data_frame: DataFrame) -> None:
         """Validates only header, needed to validate that dask taskgraph can be built before
-        evaluation"""
+        evaluation
+        
+        Args:
+            data_frame (DataFrame): The DataFrame to validate.
+        """
         for column in data_frame.columns:
             if column not in self._schema.columns:
                 raise pa.errors.SchemaError(
                     self._schema, data_frame, "Header is invalid!"
                 )
 
     def validate(self, data_frame: DataFrame) -> DataFrame:
-        """Validate multiway contact dataframe"""
+        """Validate multiway contact dataframe
+        
+        Args:
+            data_frame (DataFrame): The DataFrame to validate.
+        
+        """
         return self._schema.validate(data_frame)
diff --git a/spoc/fragments.py b/spoc/fragments.py
@@ -12,31 +12,51 @@
 
 
 class Fragments:
-    """Genomic fragments that can be labelled or not"""
+    """Genomic fragments that can be labelled or not.
+    
+    Args:
+        fragment_frame (DataFrame): DataFrame containing the fragment data.
+    """
 
     def __init__(self, fragment_frame: DataFrame) -> None:
         self._data = FragmentSchema.validate(fragment_frame)
         self._contains_metadata = "metadata" in fragment_frame.columns
 
     @property
-    def data(self):
-        """Returns the underlying dataframe"""
+    def data(self) -> DataFrame:
+        """Returns the underlying dataframe.
+        
+        Returns:
+            DataFrame: Fragment data.
+        """
         return self._data
 
     @property
-    def contains_metadata(self):
-        """Returns whether the dataframe contains metadata"""
+    def contains_metadata(self) -> bool:
+        """Returns whether the dataframe contains metadata.
+        
+        Returns:
+            bool: Whether the fragment data contains metadata.
+        """
         return self._contains_metadata
 
     @property
-    def is_dask(self):
-        """Returns whether the underlying dataframe is dask"""
+    def is_dask(self) -> bool:
+        """Returns whether the underlying dataframe is dask.
+        
+        Returns:
+            bool: Whether the underlying dataframe is a dask dataframe.
+        """
         return isinstance(self._data, dd.DataFrame)
 
 
 # TODO: make generic such that label library can hold arbitrary information
 class FragmentAnnotator:
-    """Responsible for annotating labels and sister identity of mapped read fragments"""
+    """Responsible for annotating labels and sister identity of mapped read fragments.
+    
+    Args:
+        label_library (Dict[str, bool]): Dictionary containing the label library.
+    """
 
     def __init__(self, label_library: Dict[str, bool]) -> None:
         self._label_library = label_library
@@ -72,7 +92,15 @@ def _assign_label_state(self, data_frame: pd.DataFrame) -> pd.Series:
     def annotate_fragments(self, fragments: Fragments) -> Fragments:
         """Takes fragment dataframe and returns a copy of it with its labelling state in a separate
         column with name `is_labelled`. If drop_uninformative is true, drops fragments that
-        are not in label library."""
+        are not in label library.
+        
+        Args:
+            fragments (Fragments): Fragments object containing the fragment data.
+
+        Returns:
+            Fragments: Fragments object with annotated fragment data.
+        
+        """
         return Fragments(
             fragments.data.assign(is_labelled=self._assign_label_state)
             .dropna(subset=["is_labelled"])
@@ -83,7 +111,13 @@ def annotate_fragments(self, fragments: Fragments) -> Fragments:
 
 class FragmentExpander:
     """Expands n-way fragments over sequencing reads
-    to yield contacts."""
+    to yield contacts.
+    
+    Args:
+        number_fragments (int): Number of fragments.
+        contains_metadata (bool, optional): Whether the fragment data contains metadata. Defaults to True.
+    
+    """
 
     def __init__(self, number_fragments: int, contains_metadata: bool = True) -> None:
         self._number_fragments = number_fragments
@@ -127,7 +161,14 @@ def _expand_single_read(
         return pd.DataFrame(result)
 
     def expand(self, fragments: Fragments) -> Contacts:
-        """expand contacts n-ways"""
+        """expand contacts n-ways
+        
+        Args:
+            fragments (Fragments): Fragments object containing the fragment data.
+
+        Returns:
+            Contacts: Contacts object containing the expanded contact data.
+        """
         # construct dataframe type specific kwargs
         if fragments.is_dask:
             kwargs = dict(meta=self._get_expansion_output_structure())