From 31d280aa1f064d875c565bfbdc01d37278ffe7fe Mon Sep 17 00:00:00 2001 From: jkanche Date: Sat, 28 Oct 2023 21:51:02 -0700 Subject: [PATCH 01/10] change to functional style for getters and setters --- src/biocframe/BiocFrame.py | 211 ++++++++++++++++++++++++++++++++----- 1 file changed, 186 insertions(+), 25 deletions(-) diff --git a/src/biocframe/BiocFrame.py b/src/biocframe/BiocFrame.py index 4c93042..d836d32 100644 --- a/src/biocframe/BiocFrame.py +++ b/src/biocframe/BiocFrame.py @@ -285,6 +285,19 @@ def __str__(self) -> str: return capture.get() + def _setter_copy(self, in_place: bool = False) -> "BiocFrame": + if in_place: + return self + else: + return type(self)( + self._data, + self._number_of_rows, + self._row_names, + self._column_names, + self._mcols, + self._metadata, + ) + @property def shape(self) -> Tuple[int, int]: """Dimensionality of the BiocFrame. @@ -295,6 +308,45 @@ def shape(self) -> Tuple[int, int]: """ return (self._number_of_rows, len(self._column_names)) + def get_row_names(self) -> Optional[List]: + """Row names of the BiocFrame. + + Returns: + (List, optional): Row names if available, otherwise None. + """ + return self._row_names + + def set_row_names( + self, names: Optional[list], in_place: bool = False + ) -> "BiocFrame": + """Set new row names. All values in ``names`` must be unique. + + Args: + names (List[str], optional): A list of unique values. + in_place (bool): Whether to modify the ``BiocFrame`` object in place. + + Raises: + ValueError: If the length of ``names`` does not match the number of rows. + ValueError: If ``names`` is not unique. + + Returns: + A modified ``BiocFrame`` object, either as a copy of the original + or as a reference to the (in-place-modified) original. + """ + if names is not None: + if len(names) != self.shape[0]: + raise ValueError( + "Length of `names` does not match the number of rows, need to be " + f"{self.shape[0]} but provided {len(names)}." + ) + + if not validate_unique_list(names): + warn("row names are not unique!") + + output = self._setter_copy(in_place) + output._row_names = names + return output + @property def row_names(self) -> Optional[List]: """Row names of the BiocFrame. @@ -302,6 +354,10 @@ def row_names(self) -> Optional[List]: Returns: (List, optional): Row names if available, otherwise None. """ + warn( + "'row_names' is deprecated, use 'get_row_names' instead", + DeprecationWarning, + ) return self._row_names @row_names.setter @@ -316,6 +372,11 @@ def row_names(self, names: Optional[List]): ValueError: If ``names`` is not unique. """ + warn( + "Setting property 'row_names' is deprecated, use 'set_row_names' instead", + DeprecationWarning, + ) + if names is not None: if len(names) != self.shape[0]: raise ValueError( @@ -330,13 +391,56 @@ def row_names(self, names: Optional[List]): @property def data(self) -> Dict[str, Any]: - """Access columns as :py:class:`dict`. + """Access columns as :py:class:`dict` Read-only property. Returns: Dict[str, Any]: Dictionary of columns and their values. """ return self._data + def get_column_names(self) -> List[str]: + """Column names of the BiocFrame. + + Returns: + List[str]: A list of column names. + """ + return self._column_names + + def set_column_names(self, names: List[str], in_place: bool = False) -> "BiocFrame": + """Set new column names. New names must be unique. + + Args: + names (List[str]): A list of unique values. + in_place (bool): Whether to modify the ``BiocFrame`` object in place. + + Raises: + ValueError: + If the length of ``names`` does not match the number of columns. + If ``names`` is not unique. + + Returns: + A modified ``BiocFrame`` object, either as a copy of the original + or as a reference to the (in-place-modified) original. + """ + + if names is None: + raise ValueError("column names cannot be None!") + + if len(names) != len(self._column_names): + raise ValueError("Provided `names` does not match number of columns.") + + if not (validate_unique_list(names)): + raise ValueError("Column names must be unique!") + + new_data = OrderedDict() + for idx in range(len(names)): + new_data[names[idx]] = self._data[self.column_names[idx]] + + output = self._setter_copy(in_place) + output._column_names = names + output._data = new_data + return output + @property def column_names(self) -> List[str]: """Column names of the BiocFrame. @@ -344,6 +448,10 @@ def column_names(self) -> List[str]: Returns: List[str]: A list of column names. """ + warn( + "'column_names' is deprecated, use 'get_column_names' instead", + DeprecationWarning, + ) return self._column_names @column_names.setter @@ -359,6 +467,11 @@ def column_names(self, names: List[str]): If ``names`` is not unique. """ + warn( + "Setting property 'column_names' is deprecated, use 'set_column_names' instead", + DeprecationWarning, + ) + if names is None: raise ValueError("`names` cannot be `None`!") @@ -378,16 +491,48 @@ def column_names(self, names: List[str]): self._column_names = names self._data = new_data + def get_mcols(self) -> Union[None, "BiocFrame"]: + """The ``mcols``, containing annotation on the columns.""" + return self._mcols + + def set_mcols( + self, mcols: Union[None, "BiocFrame"], in_place: bool = False + ) -> "BiocFrame": + """Set new `mcols`, containing annotations on the columns. + + Args: + mcols (Biocframe, optional): New mcols. Can be `None` to remove this information. + in_place (bool): Whether to modify the ``BiocFrame`` object in place. + + Returns: + A modified ``BiocFrame`` object, either as a copy of the original + or as a reference to the (in-place-modified) original. + """ + if mcols is not None: + if mcols.shape[0] != self.shape[1]: + raise ValueError( + "Number of rows in `mcols` should be equal to the number of columns." + ) + + output = self._setter_copy(in_place) + output._mcols = mcols + return output + @property def mcols(self) -> Union[None, "BiocFrame"]: - """ - Returns: The ``mcols``, containing annotation on the columns. - """ - # TODO: need to attach row names. + """The ``mcols``, containing annotation on the columns.""" + warn( + "'mcols' is deprecated, use 'get_mcols' instead", + DeprecationWarning, + ) return self._mcols @mcols.setter def mcols(self, mcols: Union[None, "BiocFrame"]): + warn( + "Setting property 'mcols' is deprecated, use 'set_mcols' instead", + DeprecationWarning, + ) if mcols is not None: if mcols.shape[0] != self.shape[1]: raise ValueError( @@ -395,6 +540,34 @@ def mcols(self, mcols: Union[None, "BiocFrame"]): ) self._mcols = mcols + def get_metadata(self) -> dict: + """Access metadata. + + Returns: + dict: Metadata object. + """ + return self._metadata + + def set_metadata(self, metadata: dict, in_place: bool = False) -> "BiocFrame": + """Set new metadata. + + Args: + metadata (dict): New metadata object. + in_place (bool): Whether to modify the ``BiocFrame`` object in place. + + Returns: + A modified ``BiocFrame`` object, either as a copy of the original + or as a reference to the (in-place-modified) original. + """ + if not isinstance(metadata, dict): + raise TypeError( + f"`metadata` must be a dictionary, provided {type(metadata)}." + ) + + output = self._setter_copy(in_place) + output._metadata = metadata + return output + @property def metadata(self) -> dict: """Access metadata. @@ -402,6 +575,10 @@ def metadata(self) -> dict: Returns: dict: Metadata object. """ + warn( + "'metadata' is deprecated, use 'get_metadata' instead", + DeprecationWarning, + ) return self._metadata @metadata.setter @@ -411,6 +588,10 @@ def metadata(self, metadata: dict): Args: metadata (dict): New metadata object. """ + warn( + "Setting property 'metadata' is deprecated, use 'set_metadata' instead", + DeprecationWarning, + ) if not isinstance(metadata, dict): raise TypeError( f"`metadata` must be a dictionary, provided {type(metadata)}." @@ -570,7 +751,6 @@ def _slice( mcols=mcols, ) - # TODO: implement in-place or views def __getitem__( self, args: SlicerArgTypes, @@ -683,7 +863,6 @@ def __getitem__( raise TypeError("Provided slice arguments are not supported!") - # TODO: implement in-place or views def __setitem__(self, name: str, value: List): """Add or re-assign a value to a column. @@ -859,15 +1038,6 @@ def rownames(self) -> Optional[list]: """ return self.row_names - @rownames.setter - def rownames(self, names: list): - """Alias to :py:meth:`~biocframe.BiocFrame.BiocFrame.row_names`. - - Args: - names (list): New row names. - """ - self.row_names = names - @property def colnames(self) -> list: """Alias to :py:meth:`~biocframe.BiocFrame.BiocFrame.column_names`. @@ -877,15 +1047,6 @@ def colnames(self) -> list: """ return self.column_names - @colnames.setter - def colnames(self, names: list): - """Alias to :py:meth:`~biocframe.BiocFrame.BiocFrame.column_names`. - - Args: - names (list): New column names. - """ - self.column_names = names - @property def dims(self) -> Tuple[int, int]: """Alias to :py:meth:`~biocframe.BiocFrame.BiocFrame.shape`. From 2c42be1d3c3727000b9433ad210fb184bef3b211 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Mon, 30 Oct 2023 14:48:01 -0700 Subject: [PATCH 02/10] continue with the rest of the functional changes --- src/biocframe/BiocFrame.py | 260 +++++++++++++++++++++---------------- 1 file changed, 151 insertions(+), 109 deletions(-) diff --git a/src/biocframe/BiocFrame.py b/src/biocframe/BiocFrame.py index 56dc28a..570cd1b 100644 --- a/src/biocframe/BiocFrame.py +++ b/src/biocframe/BiocFrame.py @@ -285,18 +285,11 @@ def __str__(self) -> str: return capture.get() - def _setter_copy(self, in_place: bool = False) -> "BiocFrame": - if in_place: + def _define_output(self, in_place: bool = False) -> "BiocFrame": + if in_place is True: return self else: - return type(self)( - self._data, - self._number_of_rows, - self._row_names, - self._column_names, - self._mcols, - self._metadata, - ) + return self.__copy__() @property def shape(self) -> Tuple[int, int]: @@ -330,7 +323,7 @@ def set_row_names( ValueError: If ``names`` is not unique. Returns: - A modified ``BiocFrame`` object, either as a copy of the original + BiocFrame: A modified ``BiocFrame`` object, either as a copy of the original or as a reference to the (in-place-modified) original. """ if names is not None: @@ -343,7 +336,7 @@ def set_row_names( if not validate_unique_list(names): warn("row names are not unique!") - output = self._setter_copy(in_place) + output = self._define_output(in_place) output._row_names = names return output @@ -354,15 +347,11 @@ def row_names(self) -> Optional[List]: Returns: (List, optional): Row names if available, otherwise None. """ - warn( - "'row_names' is deprecated, use 'get_row_names' instead", - DeprecationWarning, - ) - return self._row_names + return self.get_row_names() @row_names.setter def row_names(self, names: Optional[List]): - """Set new row names. All values in ``names`` must be unique. + """Set new row names. All values in ``names`` must be unique. (in-place operation). Args: names (List[str], optional): A list of unique values. @@ -373,21 +362,11 @@ def row_names(self, names: Optional[List]): """ warn( - "Setting property 'row_names' is deprecated, use 'set_row_names' instead", - DeprecationWarning, + "Setting property 'row_names'is an in-place operation, use 'set_row_names' instead", + UserWarning, ) - if names is not None: - if len(names) != self.shape[0]: - raise ValueError( - "Length of `names` does not match the number of rows, need to be " - f"{self.shape[0]} but provided {len(names)}." - ) - - if not validate_unique_list(names): - warn("row names are not unique!") - - self._row_names = names + self.set_row_names(names, in_place=True) @property def data(self) -> Dict[str, Any]: @@ -419,7 +398,7 @@ def set_column_names(self, names: List[str], in_place: bool = False) -> "BiocFra If ``names`` is not unique. Returns: - A modified ``BiocFrame`` object, either as a copy of the original + BiocFrame: A modified ``BiocFrame`` object, either as a copy of the original or as a reference to the (in-place-modified) original. """ @@ -436,7 +415,7 @@ def set_column_names(self, names: List[str], in_place: bool = False) -> "BiocFra for idx in range(len(names)): new_data[names[idx]] = self._data[self.column_names[idx]] - output = self._setter_copy(in_place) + output = self._define_output(in_place) output._column_names = names output._data = new_data return output @@ -448,15 +427,11 @@ def column_names(self) -> List[str]: Returns: List[str]: A list of column names. """ - warn( - "'column_names' is deprecated, use 'get_column_names' instead", - DeprecationWarning, - ) - return self._column_names + return self.get_column_names() @column_names.setter def column_names(self, names: List[str]): - """Set new column names. New names must be unique. + """Set new column names. New names must be unique (in-place operation). Args: names (List[str]): A list of unique values. @@ -468,28 +443,11 @@ def column_names(self, names: List[str]): """ warn( - "Setting property 'column_names' is deprecated, use 'set_column_names' instead", - DeprecationWarning, + "Setting property 'column_names'is an in-place operation, use 'set_column_names' instead", + UserWarning, ) - if names is None: - raise ValueError("`names` cannot be `None`!") - - if len(names) != len(self._column_names): - raise ValueError( - "Length of `names` does not match number of columns, need to be " - f"{len(self._column_names)} but provided {len(names)}." - ) - - if not (validate_unique_list(names)): - raise ValueError("Column names must be unique!") - - new_data = OrderedDict() - for idx in range(len(names)): - new_data[names[idx]] = self._data[self.column_names[idx]] - - self._column_names = names - self._data = new_data + self.set_column_names(names, in_place=True) def get_mcols(self) -> Union[None, "BiocFrame"]: """The ``mcols``, containing annotation on the columns.""" @@ -505,7 +463,7 @@ def set_mcols( in_place (bool): Whether to modify the ``BiocFrame`` object in place. Returns: - A modified ``BiocFrame`` object, either as a copy of the original + BiocFrame: A modified ``BiocFrame`` object, either as a copy of the original or as a reference to the (in-place-modified) original. """ if mcols is not None: @@ -514,31 +472,28 @@ def set_mcols( "Number of rows in `mcols` should be equal to the number of columns." ) - output = self._setter_copy(in_place) + output = self._define_output(in_place) output._mcols = mcols return output @property def mcols(self) -> Union[None, "BiocFrame"]: """The ``mcols``, containing annotation on the columns.""" - warn( - "'mcols' is deprecated, use 'get_mcols' instead", - DeprecationWarning, - ) - return self._mcols + return self.get_mcols() @mcols.setter def mcols(self, mcols: Union[None, "BiocFrame"]): + """Set new mcols (in-place operation). + + Args: + mcols (Union[None, BiocFrame]): New metadata about column to set. + """ warn( - "Setting property 'mcols' is deprecated, use 'set_mcols' instead", - DeprecationWarning, + "Setting property 'mcols'is an in-place operation, use 'set_mcols' instead", + UserWarning, ) - if mcols is not None: - if mcols.shape[0] != self.shape[1]: - raise ValueError( - "Number of rows in `mcols` should be equal to the number of columns." - ) - self._mcols = mcols + + self.set_mcols(mcols, in_place=True) def get_metadata(self) -> dict: """Access metadata. @@ -556,7 +511,7 @@ def set_metadata(self, metadata: dict, in_place: bool = False) -> "BiocFrame": in_place (bool): Whether to modify the ``BiocFrame`` object in place. Returns: - A modified ``BiocFrame`` object, either as a copy of the original + BiocFrame: A modified ``BiocFrame`` object, either as a copy of the original or as a reference to the (in-place-modified) original. """ if not isinstance(metadata, dict): @@ -564,7 +519,7 @@ def set_metadata(self, metadata: dict, in_place: bool = False) -> "BiocFrame": f"`metadata` must be a dictionary, provided {type(metadata)}." ) - output = self._setter_copy(in_place) + output = self._define_output(in_place) output._metadata = metadata return output @@ -575,29 +530,21 @@ def metadata(self) -> dict: Returns: dict: Metadata object. """ - warn( - "'metadata' is deprecated, use 'get_metadata' instead", - DeprecationWarning, - ) - return self._metadata + return self.get_metadata() @metadata.setter def metadata(self, metadata: dict): - """Set new metadata. + """Set new metadata (in-place operation). Args: metadata (dict): New metadata object. """ warn( - "Setting property 'metadata' is deprecated, use 'set_metadata' instead", - DeprecationWarning, + "Setting property 'metadata'is an in-place operation, use 'set_metadata' instead", + UserWarning, ) - if not isinstance(metadata, dict): - raise TypeError( - f"`metadata` must be a dictionary, provided {type(metadata)}." - ) - self._metadata = metadata + self.set_metadata(metadata, in_place=True) def has_column(self, name: str) -> bool: """Check whether the column exists in the BiocFrame. @@ -664,16 +611,19 @@ def row(self, index_or_name: Union[str, int]) -> dict: return self[index_or_name, None] - def _slice( + def slice( self, row_indices_or_names: Optional[SlicerTypes] = None, column_indices_or_names: Optional[SlicerTypes] = None, ) -> Union["BiocFrame", dict, list]: - """Internal method to slice by index or values. + """Slice ``BiocFrame`` by index or values. Args: row_indices_or_names (SlicerTypes, optional): Row indices (index positions) - or row names (string) to slice. Defaults to None. + or row names (string) to slice. + + Object must contain :py:attr:`biocframe.BiocFrame.BiocFrame.row_names` to slice by names. + Defaults to None. column_indices_or_names (SlicerTypes, optional): Column indices (index positions) or column names (string) to slice. Defaults to None. @@ -739,7 +689,7 @@ def _slice( mcols = self._mcols if mcols is not None: if column_indices_or_names is not None: - mcols = mcols._slice(new_column_indices, None) + mcols = mcols.slice(new_column_indices, None) current_class_const = type(self) return current_class_const( @@ -823,21 +773,21 @@ def __getitem__( # not an array, single str, slice by column if isinstance(args, str): - return self._slice(None, args) + return self.slice(None, args) if isinstance(args, int): - return self._slice(args, None) + return self.slice(args, None) # not an array, a slice if isinstance(args, slice): - return self._slice(args, None) + return self.slice(args, None) if isinstance(args, list): # column names if everything is a string if is_list_of_type(args, str): - return self._slice(None, args) + return self.slice(None, args) elif is_list_of_type(args, int): - return self._slice(args, None) + return self.slice(args, None) else: raise TypeError( "Arguments not supported! Since slice is a list, must contain either list of column " @@ -850,9 +800,9 @@ def __getitem__( raise ValueError("Arguments must specify atleast a single slice!") if len(args) == 1: - return self._slice(args[0], None) + return self.slice(args[0], None) elif len(args) == 2: - return self._slice( + return self.slice( args[0], args[1], ) @@ -863,9 +813,8 @@ def __getitem__( raise TypeError("Provided slice arguments are not supported!") - # TODO: implement in-place or views def __setitem__(self, args, value: Union[List, "BiocFrame"]): - """Add or re-assign a value to a column. + """Add or re-assign a value to a column (in-place operation). Usage: @@ -892,6 +841,11 @@ def __setitem__(self, args, value: Union[List, "BiocFrame"]): Raises: ValueError: If the length of ``value`` does not match the number of rows. """ + warn( + "This method perform an in-place operation, use 'set_column' instead", + UserWarning, + ) + if isinstance(args, tuple): rows, cols = args @@ -931,7 +885,7 @@ def __setitem__(self, args, value: Union[List, "BiocFrame"]): self._data[args] = value def __delitem__(self, name: str): - """Remove a column. + """Remove a column (in-place operation). Usage: @@ -956,18 +910,106 @@ def __delitem__(self, name: str): Raises: ValueError: If `name` is not a valid column. """ + warn( + "This method perform an in-place operation, use 'remove_column' instead", + UserWarning, + ) + self.remove_column(name, in_place=True) + + def set_column( + self, + args: SlicerArgTypes, + value: Union[List, "BiocFrame"], + in_place: bool = False, + ) -> "BiocFrame": + """Set or Modify a column. + + Args: + args (SlicerArgTypes): Name of the column. + value (Union[List, "BiocFrame"]): New value to set. + in_place (bool): Whether to modify the object in place. Defaults to False. + + Raises: + TypeError: If row indices are not a sequence or slice. + ValueError: If length of `value` does not match the number of rows. + + Returns: + BiocFrame: A modified ``BiocFrame`` object, either as a copy of the original + or as a reference to the (in-place-modified) original. + """ + output = self._define_output(in_place) + if isinstance(args, tuple): + rows, cols = args + + row_idx, scalar = normalize_subscript( + rows, output.shape[0], names=output._row_names + ) + if scalar: + raise TypeError("row indices should be a sequence or slice") + + col_idx, scalar = normalize_subscript( + cols, output.shape[1], names=output._column_names + ) + if scalar: + current = output._data[output._column_names[col_idx[0]]] + for j, k in enumerate(row_idx): + current[k] = value[j] + else: + for i in col_idx: + nm = output._column_names[i] + current = output._data[nm] + replacement = value._data[nm] + for j, k in enumerate(row_idx): + current[k] = replacement[j] + else: + if len(value) != output.shape[0]: + raise ValueError( + "Length of `value`, does not match the number of the rows," + f"need to be {output.shape[0]} but provided {len(value)}." + ) + + if args not in output.column_names: + output._column_names.append(args) + + if output._mcols is not None: + output._mcols = output._mcols.combine( + BiocFrame({}, number_of_rows=1) + ) + + output._data[args] = value + + return output + + def remove_column(self, name: str, in_place: bool = False) -> "BiocFrame": + """Remove a column. + + Args: + name (str): Name of the column to remove. + in_place (bool): Whether to modify the object in place. Defaults to False. + + Raises: + ValueError: If column does not exist. + + Returns: + BiocFrame: A modified ``BiocFrame`` object, either as a copy of the original + or as a reference to the (in-place-modified) original. + """ if name not in self.column_names: raise ValueError(f"Column: '{name}' does not exist.") - del self._data[name] - _col_idx = self._column_names.index(name) + output = self._define_output(in_place) + + del output._data[name] + _col_idx = output._column_names.index(name) # TODO: do something better later! - _indices = [i for i in range(len(self._column_names)) if i != _col_idx] + _indices = [i for i in range(len(output._column_names)) if i != _col_idx] + + output._column_names.remove(name) + if output._mcols is not None: + output._mcols = output._mcols[_indices, :] - self._column_names.remove(name) - if self._mcols is not None: - self._mcols = self._mcols[_indices, :] + return output def __len__(self) -> int: """Number of rows. From 009e2746d75ebfc73433e45c575637ec92f4a66c Mon Sep 17 00:00:00 2001 From: jkanche Date: Mon, 30 Oct 2023 19:13:04 -0700 Subject: [PATCH 03/10] for backwards compatibility --- src/biocframe/BiocFrame.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/biocframe/BiocFrame.py b/src/biocframe/BiocFrame.py index 72e9cf8..e4a7ec4 100644 --- a/src/biocframe/BiocFrame.py +++ b/src/biocframe/BiocFrame.py @@ -1097,7 +1097,7 @@ def columns(self) -> list: Returns: list: List of column names. """ - return self.column_names + return self.get_column_names() @property def index(self) -> Optional[list]: @@ -1106,7 +1106,7 @@ def index(self) -> Optional[list]: Returns: (list, optional): List of row names if available, otherwise None. """ - return self.row_names + return self.get_row_names() # compatibility with R interfaces @property @@ -1116,7 +1116,15 @@ def rownames(self) -> Optional[list]: Returns: (list, optional): List of row names if available, otherwise None. """ - return self.row_names + return self.get_row_names() + + @rownames.setter + def rownames(self, names: list): + """Alias to :py:meth:`~biocframe.BiocFrame.BiocFrame.row_names` (in-place operation). + Args: + names (list): New row names. + """ + return self.set_row_names(names, in_place=True) @property def colnames(self) -> list: @@ -1125,7 +1133,15 @@ def colnames(self) -> list: Returns: list: list of column names. """ - return self.column_names + self.get_column_names() + + @colnames.setter + def colnames(self, names: list): + """Alias to :py:meth:`~biocframe.BiocFrame.BiocFrame.column_names` (in-place operation). + Args: + names (list): New column names. + """ + self.set_column_names(names, in_place=True) @property def dims(self) -> Tuple[int, int]: From 24cc684fc2ae94b192143c464478c72ea1e4e940 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 31 Oct 2023 02:13:14 +0000 Subject: [PATCH 04/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/biocframe/BiocFrame.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/biocframe/BiocFrame.py b/src/biocframe/BiocFrame.py index e4a7ec4..5afd061 100644 --- a/src/biocframe/BiocFrame.py +++ b/src/biocframe/BiocFrame.py @@ -1121,6 +1121,7 @@ def rownames(self) -> Optional[list]: @rownames.setter def rownames(self, names: list): """Alias to :py:meth:`~biocframe.BiocFrame.BiocFrame.row_names` (in-place operation). + Args: names (list): New row names. """ @@ -1138,6 +1139,7 @@ def colnames(self) -> list: @colnames.setter def colnames(self, names: list): """Alias to :py:meth:`~biocframe.BiocFrame.BiocFrame.column_names` (in-place operation). + Args: names (list): New column names. """ From 250dbec5a947a3c1e17e861e1267caf3b30bcbff Mon Sep 17 00:00:00 2001 From: jkanche Date: Mon, 30 Oct 2023 19:17:27 -0700 Subject: [PATCH 05/10] missing return --- src/biocframe/BiocFrame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/biocframe/BiocFrame.py b/src/biocframe/BiocFrame.py index 5afd061..1c31f7c 100644 --- a/src/biocframe/BiocFrame.py +++ b/src/biocframe/BiocFrame.py @@ -1134,7 +1134,7 @@ def colnames(self) -> list: Returns: list: list of column names. """ - self.get_column_names() + return self.get_column_names() @colnames.setter def colnames(self, names: list): From 533ddf4a8bfeb7c00b739b44d78ab0c095127a31 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Tue, 31 Oct 2023 07:28:29 -0700 Subject: [PATCH 06/10] update docstrings, add props setters with warnings. --- src/biocframe/BiocFrame.py | 3 +- src/biocframe/Factor.py | 103 ++++++++++++++++++++++++++++++------- 2 files changed, 86 insertions(+), 20 deletions(-) diff --git a/src/biocframe/BiocFrame.py b/src/biocframe/BiocFrame.py index 1c31f7c..95112ec 100644 --- a/src/biocframe/BiocFrame.py +++ b/src/biocframe/BiocFrame.py @@ -57,7 +57,8 @@ def __next__(self): class BiocFrame: - """`BiocFrame` is an alternative to :class:`~pandas.DataFrame`. + """`BiocFrame` is an alternative to :class:`~pandas.DataFrame`, with support for nested and flexible column types. + Similar to R's ``DataFrame`` class. Columns are required to implement the length (``__len__``) and slice (``__getitem__``) dunder methods. This allows :py:class:`~biocframe.BiocFrame.BiocFrame` to accept nested diff --git a/src/biocframe/Factor.py b/src/biocframe/Factor.py index 002f6aa..8fb1152 100644 --- a/src/biocframe/Factor.py +++ b/src/biocframe/Factor.py @@ -1,5 +1,6 @@ from copy import deepcopy from typing import List, Sequence, Union +from warnings import warn import biocutils as ut from biocgenerics.combine import combine @@ -19,7 +20,8 @@ def __init__( ordered: bool = False, validate: bool = True, ): - """ + """Initialize a Factor object. + Args: codes: List of codes. Each value should be a non-negative @@ -63,29 +65,76 @@ def __init__( raise ValueError("all entries of 'levels' should be unique") def get_codes(self) -> List[int]: - """ + """Get list of codes. + Returns: List of codes, used as indices into the levels from :py:attr:`~get_levels`. Values may also be None. """ return self._codes - def get_levels(self) -> List[str]: + @property + def codes(self) -> List[int]: + """List of codes, used as indices into the levels from + :py:attr:`~get_levels`. Values may also be None (read-only property). + + Returns: + List[int]: List of codes. """ + return self.get_codes() + + def get_levels(self) -> List[str]: + """Get unique factor levels. + Returns: - List of unique factor levels. + List[str]: List of factor levels. """ return self._levels - def get_ordered(self) -> bool: + @property + def levels(self) -> List[str]: + """Get list of unique factor levels.""" + return self.get_levels() + + @levels.setter + def levels(self, levels: Union[str, List[str]]): + """Modify levels in the list (in-place operation). + + Args: + levels (Union[str, List[str]]): A list of replacement levels. These should be unique strings + with no missing values. + + Alternatively a single string containing an existing level in + this object. The new levels are defined as a permutation of the + existing levels where the provided string is now the first + level. The order of all other levels is preserved. """ + warn( + "Setting property 'levels'is an in-place operation, use 'set_levels' instead", + UserWarning, + ) + self.set_levels(levels, in_place=True) + + def get_ordered(self) -> bool: + """Get whether the levels are ordered. + Returns: - Whether the levels are ordered. + bool: True if ordered, otherwise False. """ return self._ordered - def __len__(self) -> int: + @property + def ordered(self) -> bool: + """Get whether the levels are ordered (read-only). + + Returns: + bool: True if ordered, otherwise False. """ + return self.get_ordered() + + def __len__(self) -> int: + """Get length. + Returns: Length of the factor in terms of the number of codes. """ @@ -129,14 +178,15 @@ def __str__(self) -> str: return message def __getitem__(self, args: Union[int, Sequence[int]]) -> Union[str, "Factor"]: - """ + """Subset the ``Factor`` list. + Args: args: Sequence of integers specifying the elements of interest. Alternatively an integer specifying a single element. Returns: - If ``args`` is a sequence, a new ``Factor`` is returned containing + If ``args`` is a sequence, returns same type as caller (a bew ``Factor``) containing only the elements of interest from ``args``. If ``args`` is an integer, a string is returned containing the @@ -153,10 +203,15 @@ def __getitem__(self, args: Union[int, Sequence[int]]) -> Union[str, "Factor"]: new_codes = [] for i in args: new_codes.append(self._codes[i]) - return Factor(new_codes, self._levels, self._ordered, validate=False) + + current_class_const = type(self) + return current_class_const( + new_codes, self._levels, self._ordered, validate=False + ) def __setitem__(self, args: Sequence[int], value: "Factor"): - """ + """Modify the ``Factor`` list. + Args: args: Sequence of integers specifying the elements to be replaced. @@ -190,7 +245,8 @@ def __setitem__(self, args: Sequence[int], value: "Factor"): self._codes[x] = mapping[value._codes[i]] def drop_unused_levels(self, in_place: bool = False) -> "Factor": - """ + """Drop unused levels. + Args: in_place: Whether to perform this modification in-place. @@ -227,12 +283,16 @@ def drop_unused_levels(self, in_place: bool = False) -> "Factor": self._levels = new_levels return self else: - return Factor(new_codes, new_levels, self._ordered, validate=False) + current_class_const = type(self) + return current_class_const( + new_codes, new_levels, self._ordered, validate=False + ) def set_levels( self, levels: Union[str, List[str]], in_place: bool = False ) -> "Factor": - """ + """Set or replace levels. + Args: levels: A list of replacement levels. These should be unique strings @@ -292,21 +352,28 @@ def set_levels( self._levels = new_levels return self else: - return Factor(new_codes, new_levels, self._ordered, validate=False) + current_class_const = type(self) + return current_class_const( + new_codes, new_levels, self._ordered, validate=False + ) def __copy__(self) -> "Factor": """ Returns: A shallow copy of the ``Factor`` object. """ - return Factor(self._codes, self._levels, self._ordered, validate=False) + current_class_const = type(self) + return current_class_const( + self._codes, self._levels, self._ordered, validate=False + ) def __deepcopy__(self, memo) -> "Factor": """ Returns: A deep copy of the ``Factor`` object. """ - return Factor( + current_class_const = type(self) + return current_class_const( deepcopy(self._codes, memo), deepcopy(self._levels, memo), self._ordered, @@ -320,7 +387,6 @@ def to_pandas(self): Categorical: A :py:class:`~pandas.Categorical` object. """ from pandas import Categorical - return Categorical( values=[self._levels[c] for c in self._codes], ordered=self._ordered, @@ -340,7 +406,6 @@ def from_list(values: Sequence[str]) -> "Factor": Factor: A Factor object. """ levels, indices = ut.factor(values) - return Factor(indices, levels=levels) From 061e97e513c878e9065fbcc622bf4ebc68072836 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 31 Oct 2023 14:28:43 +0000 Subject: [PATCH 07/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/biocframe/Factor.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/biocframe/Factor.py b/src/biocframe/Factor.py index 8fb1152..8dbbf79 100644 --- a/src/biocframe/Factor.py +++ b/src/biocframe/Factor.py @@ -75,8 +75,8 @@ def get_codes(self) -> List[int]: @property def codes(self) -> List[int]: - """List of codes, used as indices into the levels from - :py:attr:`~get_levels`. Values may also be None (read-only property). + """List of codes, used as indices into the levels from :py:attr:`~get_levels`. Values may also be None (read- + only property). Returns: List[int]: List of codes. @@ -387,6 +387,7 @@ def to_pandas(self): Categorical: A :py:class:`~pandas.Categorical` object. """ from pandas import Categorical + return Categorical( values=[self._levels[c] for c in self._codes], ordered=self._ordered, From bc3f3224ef5ff3523262a9a5511a6deb54033289 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Tue, 31 Oct 2023 07:51:11 -0700 Subject: [PATCH 08/10] update README --- README.md | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 2eb1c21..929c93a 100644 --- a/README.md +++ b/README.md @@ -16,9 +16,15 @@ # BiocFrame -This package provides `BiocFrame` class, an alternative to Pandas DataFrame's. +This package provides -`BiocFrame` makes no assumption on the types of the columns, the minimum requirement is each column implements length: `__len__` and slice: `__getitem__` dunder methods. This allows `BiocFrame` to accept nested representations or any supported class as columns. +- `BiocFrame` class, an alternative to Pandas `DataFrame`. + + `BiocFrame` makes no assumption on the types of the columns, the minimum requirement is each column implements length: `__len__` and slice: `__getitem__` dunder methods. This allows `BiocFrame` to accept nested representations or any supported class as columns. + +- `Factor` class, equivalent to R's `factor`. + + The aim is to encode a list of strings as integers for easier numerical analysis. To get started, install the package from [PyPI](https://pypi.org/project/biocframe/) @@ -27,7 +33,7 @@ To get started, install the package from [PyPI](https://pypi.org/project/biocfra pip install biocframe ``` -## Usage +## BiocFrame To construct a `BiocFrame` object, simply provide the data as a dictionary. @@ -189,6 +195,25 @@ combined = bframe1.combine(bframe2) For more details, check out the BiocFrame class [reference](https://biocpy.github.io/BiocFrame/api/biocframe.html#biocframe.BiocFrame.BiocFrame). +## Factor + +Convert a list into a Factor object, + +```python +from biocframe import Factor + +f1 = Factor.from_list(["A", "B", "A", "B", "E"]) +print(f1) +``` + + ## output + Factor of length 5 with 3 levels + values: ['A', 'B', 'A', 'B', 'E'] + levels: ['A', 'B', 'E'] + ordered: False + + +The Factor class behaves as a list and most operations to slice or replace should work here. Check out the docs for more information! From 4f69260be43139d3975dde0a09daa7eb8a60ea30 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 31 Oct 2023 14:51:35 +0000 Subject: [PATCH 09/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 929c93a..7dc5d99 100644 --- a/README.md +++ b/README.md @@ -16,10 +16,10 @@ # BiocFrame -This package provides +This package provides - `BiocFrame` class, an alternative to Pandas `DataFrame`. - + `BiocFrame` makes no assumption on the types of the columns, the minimum requirement is each column implements length: `__len__` and slice: `__getitem__` dunder methods. This allows `BiocFrame` to accept nested representations or any supported class as columns. - `Factor` class, equivalent to R's `factor`. From f839d5794a572845d947df6c9e77445dbd5d1b2c Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Tue, 31 Oct 2023 08:33:26 -0700 Subject: [PATCH 10/10] Update docstring --- src/biocframe/Factor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/biocframe/Factor.py b/src/biocframe/Factor.py index 8dbbf79..55303a8 100644 --- a/src/biocframe/Factor.py +++ b/src/biocframe/Factor.py @@ -186,7 +186,7 @@ def __getitem__(self, args: Union[int, Sequence[int]]) -> Union[str, "Factor"]: Alternatively an integer specifying a single element. Returns: - If ``args`` is a sequence, returns same type as caller (a bew ``Factor``) containing + If ``args`` is a sequence, returns same type as caller (a new ``Factor``) containing only the elements of interest from ``args``. If ``args`` is an integer, a string is returned containing the @@ -251,7 +251,7 @@ def drop_unused_levels(self, in_place: bool = False) -> "Factor": in_place: Whether to perform this modification in-place. Returns: - If ``in_place = False``, a new ``Factor`` object is returned + If ``in_place = False``, returns same type as caller (a new ``Factor`` object) where all unused levels have been removed. If ``in_place = True``, unused levels are removed from the @@ -307,7 +307,7 @@ def set_levels( Whether to perform this modification in-place. Returns: - If ``in_place = False``, a new ``Factor`` object is returned where + If ``in_place = False``, returns same type as caller (a new ``Factor`` object) where the levels have been replaced. This will automatically adjust the codes so that they still refer to the same level in the new ``levels``. If a code refers to a level that is not present in the