From 9ece5ff85e539ac513e784a0665c14e1b579fe36 Mon Sep 17 00:00:00 2001 From: Aaron Lun Date: Mon, 30 Oct 2023 13:11:22 -0700 Subject: [PATCH] Support partial replacement via row indices in BiocFrame.__setitem__. (#58) * Support partial replacement via row indices in BiocFrame.__setitem__. --- src/biocframe/BiocFrame.py | 46 +++++++++++++++++++++++++++++--------- tests/test_methods.py | 26 +++++++++++++++++++++ 2 files changed, 61 insertions(+), 11 deletions(-) diff --git a/src/biocframe/BiocFrame.py b/src/biocframe/BiocFrame.py index 4c93042..0d90f83 100644 --- a/src/biocframe/BiocFrame.py +++ b/src/biocframe/BiocFrame.py @@ -684,7 +684,7 @@ def __getitem__( raise TypeError("Provided slice arguments are not supported!") # TODO: implement in-place or views - def __setitem__(self, name: str, value: List): + def __setitem__(self, args, value: Union[List, "BiocFrame"]): """Add or re-assign a value to a column. Usage: @@ -706,25 +706,49 @@ def __setitem__(self, name: str, value: List): bframe["symbol"] = ["gene_a", "gene_b", "gene_c"] Args: - name (str): Name of the column. + args (str): Name of the column. value (List): New value to set. Raises: ValueError: If the length of ``value`` does not match the number of rows. """ - if len(value) != self.shape[0]: - raise ValueError( - "Length of `value`, does not match the number of the rows," - f"need to be {self.shape[0]} but provided {len(value)}." + if isinstance(args, tuple): + rows, cols = args + + row_idx, scalar = normalize_subscript( + rows, self.shape[0], names=self._row_names ) + if scalar: + raise TypeError("row indices should be a sequence or slice") - if name not in self.column_names: - self._column_names.append(name) + col_idx, scalar = normalize_subscript( + cols, self.shape[1], names=self._column_names + ) + if scalar: + current = self._data[self._column_names[col_idx[0]]] + for j, k in enumerate(row_idx): + current[k] = value[j] + else: + for i in col_idx: + nm = self._column_names[i] + current = self._data[nm] + replacement = value._data[nm] + for j, k in enumerate(row_idx): + current[k] = replacement[j] + else: + if len(value) != self.shape[0]: + raise ValueError( + "Length of `value`, does not match the number of the rows," + f"need to be {self.shape[0]} but provided {len(value)}." + ) + + if args not in self.column_names: + self._column_names.append(args) - if self._mcols is not None: - self._mcols = self._mcols.combine(BiocFrame({}, number_of_rows=1)) + if self._mcols is not None: + self._mcols = self._mcols.combine(BiocFrame({}, number_of_rows=1)) - self._data[name] = value + self._data[args] = value def __delitem__(self, name: str): """Remove a column. diff --git a/tests/test_methods.py b/tests/test_methods.py index 3a93d9f..4fdf04e 100644 --- a/tests/test_methods.py +++ b/tests/test_methods.py @@ -122,6 +122,32 @@ def test_bframe_setters(): assert bframe.dims == (3, 4) +def test_bframe_setters_with_rows(): + obj = { + "column1": [1, 2, 3, 4, 5], + "column2": ["b", "n", "m", "a", "c"], + } + + bframe = BiocFrame(obj) + bframe[1:3, "column1"] = [20, 30] + assert bframe.column("column1") == [1, 20, 30, 4, 5] + + bframe = BiocFrame(obj) + bframe[1:3, ["column1", "column2"]] = BiocFrame( + {"column1": [20, 30], "column2": ["E", "F"]} + ) + assert bframe.column("column1") == [1, 20, 30, 4, 5] + assert bframe.column("column2") == ["b", "E", "F", "a", "c"] + + # Works even when columns are out of order. + bframe = BiocFrame(obj) + bframe[1:3, ["column2", "column1"]] = BiocFrame( + {"column1": [20, 30], "column2": ["E", "F"]} + ) + assert bframe.column("column1") == [1, 20, 30, 4, 5] + assert bframe.column("column2") == ["b", "E", "F", "a", "c"] + + def test_bframe_setters_should_fail(): obj = { "column1": [1, 2, 3],