From af70930a33717de819ee8824847b9154af2c3719 Mon Sep 17 00:00:00 2001 From: Aaron Lun Date: Tue, 14 Nov 2023 11:05:39 -0800 Subject: [PATCH] Overhaul implementation of NamedLists and StringLists. (#12) We no longer inherit from a list, so as to avoid complications with the base types. We also refactor the StringList class so that it is now based on NamedList, thus giving it access to names. (We make sure to prevent naming of Factor levels, though, as that is unnecessarily complicated.) --- src/biocutils/Factor.py | 2 + src/biocutils/NamedList.py | 481 +++++++++++++++++++++++++----------- src/biocutils/StringList.py | 209 +++++----------- tests/test_Factor.py | 20 +- tests/test_NamedList.py | 233 ++++++++++++----- tests/test_StringList.py | 89 ++++--- 6 files changed, 623 insertions(+), 411 deletions(-) diff --git a/src/biocutils/Factor.py b/src/biocutils/Factor.py index 510c31d..d6431cc 100644 --- a/src/biocutils/Factor.py +++ b/src/biocutils/Factor.py @@ -57,6 +57,8 @@ def __init__(self, codes: Sequence[int], levels: Sequence[str], ordered: bool = if not isinstance(levels, StringList): levels = StringList(levels) + if levels.get_names() is not None: + levels = levels.set_names(None) self._codes = codes self._levels = levels diff --git a/src/biocutils/NamedList.py b/src/biocutils/NamedList.py index 40ee11e..e30d069 100644 --- a/src/biocutils/NamedList.py +++ b/src/biocutils/NamedList.py @@ -2,62 +2,138 @@ from copy import deepcopy from .Names import Names +from .normalize_subscript import normalize_subscript from .subset_sequence import subset_sequence from .combine_sequences import combine_sequences from .assign_sequence import assign_sequence -class NamedList(list): +def _name_to_position(names: Optional[Names], index: str) -> int: + i = -1 + if names is not None: + i = names.map(index) + if i < 0: + raise KeyError("failed to find entry with name '" + index + "'") + return i + + +def _sanitize_names(names: Optional[Names], length: int) -> Union[None, Names]: + if names is None: + return names + if not isinstance(names, Names): + names = Names(names) + if len(names) != length: + raise ValueError("length of 'names' must be equal to number of entries (" + str(length) + ")") + return names + + +class NamedList: """ - A Python list with a name for each element, equivalent to R's named list. - This provides some dict-like behavior - namely, getting or setting entries - by an existing name, adding entries with a new name. Of course, it's still - a list, so it can be indexed as usual by integer positions or slices. + A list-like object that could have names for each element, equivalent to R's + named list. This combines list and dictionary functionality, e.g., it can + be indexed by position or slices (list) but also by name (dictionary). """ - def __init__(self, iterable: Optional[Union[Iterable, Dict]] = None, names: Optional[Names] = None): + def __init__(self, data: Optional[Iterable] = None, names: Optional[Names] = None, _validate: bool = True): """ Args: - iterable: - Some iterable object. - - Alternatively, a dictionary where the keys are strings. + data: + Sequence of data values. Alternatively None, for an empty list. names: - List of names. This should have same length as ``iterable``. - If None, defaults to an empty list. + List of names. This should have same length as ``data``. + Alternatively None, if the list has no valid names yet. + + _validate: + Internal use only. + """ + if _validate: + if data is None: + data = [] + elif isinstance(data, NamedList): + data = data._data + elif not isinstance(data, list): + data = list(data) + names = _sanitize_names(names, len(data)) + self._data = data + self._names = names - Ignored if ``iterable`` is a dictionary, in which case the - keys are used directly as the names. + def __len__(self) -> int: + """ + Returns: + Length of the list. + """ + return len(self._data) + + def __repr__(self) -> str: + """ + Returns: + Representation of the current list. """ - if isinstance(iterable, dict): - original = iterable - iterable = original.values() - names = (str(y) for y in original.keys()) + message = type(self).__name__ + "(data=" + repr(self._data) + if self._names is not None: + message += ", names=" + repr(self._names) + message += ")" + return message - if iterable is None: - super().__init__() + def __str__(self) -> str: + """ + Returns: + Pretty-printed representation of the current list, along with its + names if any exist. + """ + if self._names is not None: + return "[" + ", ".join(repr(self._names[i]) + "=" + repr(x) for i, x in enumerate(self._data)) + "]" else: - super().__init__(iterable) + return repr(self._data) - if names is None: - if isinstance(iterable, NamedList): - names = iterable._names - else: - names = Names() - elif not isinstance(names, Names): - names = Names(names) - self._names = names - if len(self) != len(self._names): - raise ValueError("length of 'names' should equal the length of 'data'") + def __eq__(self, other: "NamedList") -> bool: + """ + Args: + other: Another ``NamedList``. - def __repr__(self): - return "NamedList(data=" + super().__repr__() + ", names=" + repr(self._names) + ")" + Returns: + Whether the current object is equal to ``other``, i.e., + same data and names. + """ + return self.get_data() == other.get_data() and self.get_names() == other.get_names() - def __str__(self): - return "[" + ", ".join(repr(self._names[i]) + "=" + repr(x) for i, x in enumerate(self)) + "]" + def get_data(self) -> list: + """ + Returns: + The underlying list of elements. + """ + return self._data + + @property + def data(self) -> list: + """Alias for :py:attr:`~get_data`.""" + return self.get_data() + + def set_data(self, data: Sequence, in_place: bool = False) -> "NamedList": + """ + Args: + data: + Replacement list of elements. This should have the same length + as the current object. + + in_place: + Whether to modify the current object in place. + + Returns: + A modified ``NamedList``, either as a new object or a reference to + the current object. + """ + if len(data) != len(self): + raise ValueError("replacement 'data' must be of the same length") + if in_place: + output = self + else: + output = self.copy() + output._data = data + return output def get_names(self) -> Names: """ @@ -71,7 +147,7 @@ def names(self) -> Names: """Alias for :py:attr:`~get_names`.""" return self.get_names() - def set_names(self, names: Names, in_place: bool = False) -> "NamedList": + def set_names(self, names: Optional[Names], in_place: bool = False) -> "NamedList": """ Args: names: @@ -85,83 +161,175 @@ def set_names(self, names: Names, in_place: bool = False) -> "NamedList": False``, this is a new ``NamedList``, otherwise it is a reference to the current ``NamedList``. """ - if isinstance(names, Names): - names = Names(names) if in_place: - if len(names) != len(self._data): - raise ValueError("length of 'names' should equal the length of 'data'") - self._names = names - return self + output = self else: - return NamedList(self, names) + output = self.copy() + output._names = _sanitize_names(names, len(self)) + return output - def __getitem__(self, index: Union[str, int, slice]): + def get_value(self, index: Union[str, int]) -> Any: """ Args: index: - An integer index containing a position to extract, a string - specifying the name of the value to extract, or a slice - specifying multiple positions to extract. + Integer index of the element to obtain. Alternatively, a string + containing the name of the element, using the first occurrence + if duplicate names are present. Returns: - If ``index`` is an integer, the value at the specified position. + The value at the specified position (or with the specified name). + """ + if isinstance(index, str): + index = _name_to_position(self._names, index) + return self._data[index] - If ``index`` is a string, the value with the specified name. If - multiple values have the same name, the first is returned. + def get_slice(self, index: Union[str, int, bool, Sequence]) -> "NamedList": + """ + Args: + index: + Subset of elements to obtain, see + :py:func:`~normalize_subscript.normalize_subscript` for + details. Strings are matched to names in the current object, + using the first occurrence if duplicate names are present. + Scalars are treated as length-1 vectors. - If ``index`` is a slice, a new ``NamedList`` is returned - containing the items at the specified positions. + Returns: + A ``NamedList`` is returned containing the specified subset. """ - if isinstance(index, str): - i = self._names.map(index) - if i < 0: - raise KeyError("no list element named '" + index + "'") - return super().__getitem__(i) - - output = super().__getitem__(index) - if isinstance(index, slice): - return NamedList(output, self._names[index]) - return output + index, scalar = normalize_subscript(index, len(self), self._names) + outdata = subset_sequence(self._data, index) + outnames = None + if self._names is not None: + outnames = subset_sequence(self._names, index) + return type(self)(outdata, outnames, _validate=False) + + def __getitem__(self, index: Union[str, int, bool, Sequence]) -> Union["NamedList", Any]: + """ + If ``index`` is a scalar, this is an alias for :py:attr:`~get_item`. + If ``index`` is a sequence, this is an alias for :py:attr:`~get_slice`. + """ + index, scalar = normalize_subscript(index, len(self), self._names) + if scalar: + return self.get_value(index[0]) + else: + return self.get_slice(index) - def __setitem__(self, index: Union[int, str, slice], item: Any): + def set_value(self, index: Union[str, int], value: Any, in_place: bool = False) -> "NamedList": """ Args: index: - An integer index containing a position to set, a string - specifying the name of the value to set, or a slice specifying - multiple positions to set. + Integer index of the element to obtain. Alternatively, a string + containing the name of the element; we consider the first + occurrence of the name if duplicates are present. - item: - If ``index`` is an integer or string, a value to be set at the - corresponding position of this ``NamedList``. + value: + Replacement value of the list element. - If ``index`` is a slice, an iterable of the same length - containing values to be set at the sliced positions. If - ``item`` is a ``NamedList``, the names are also transferred. + in_place: + Whether to perform the replacement in place. Returns: - In the current object, the specified item(s) at ``index`` are - replaced with the contents of ``item``. - - If ``index`` is a string that does not exist in the names, it is - appended to the names and ``item`` is appended to the list. - """ - if isinstance(index, slice): - super().__setitem__(index, item) - if isinstance(item, type(self)): - self._names[index] = item._names - elif isinstance(index, str): - i = self._names.map(index) - if i >= 0: - return super().__setitem__(i, item) + A ``NamedList`` is returned after the value at the specified position + (or with the specified name) is replaced. If ``in_place = False``, this + is a new object, otherwise it is a reference to the current object. + + If ``index`` is a name that does not already exist in the current + object, ``value`` is added to the end of the list, and the + ``index`` is added as a new name. + """ + if in_place: + output = self + else: + output = self.copy() + output._data = output._data.copy() + + if isinstance(index, str): + if self._names is not None: + i = self._names.map(index) + if i < 0: + output._names = self._names.copy() + output._names.append(index) + output._data.append(value) + else: + output._data[i] = value else: - super().append(item) - self._names.append(index) + output._names = Names([""] * len(output._data)) + output._names.append(index) + output._data.append(value) else: - super().__setitem__(index, item) + output._data[index] = value - def insert(self, index: Union[int, str], item: Any): + return output + + def set_slice(self, index: Union[int, str, slice], value: Sequence, in_place: bool = False) -> "NamedList": + """ + Args: + index: + Subset of elements to replace, see + :py:func:`~normalize_subscript.normalize_subscript` for + details. Strings are matched to names in the current object, + using the first occurrence if duplicate names are present. + + value: + If ``index`` is a sequence, a sequence of the same length + containing values to be set at the positions in ``index``. + + If ``index`` is a scalar, any object to be used as the + replacement value for the position at ``index``. + + in_place: + Whether to perform the replacement in place. + + Returns: + A ``NamedList`` where the entries at ``index`` are replaced with + the contents of ``value``. If ``in_place = False``, this is a new + object, otherwise it is a reference to the current object. + + Unlike :py:attr:`~set_value`, this will not add new elements if + ``index`` contains names that do not already exist in the object; + a missing name error is raised instead. + """ + index, scalar = normalize_subscript(index, len(self), self._names) + if in_place: + output = self + else: + output = self.copy() + output._data = output._data.copy() + if scalar: + output._data[index[0]] = value + else: + for i, j in enumerate(index): + output._data[j] = value[i] + return output + + def __setitem__(self, index: Union[int, str, slice], value: Any): + """ + If ``index`` is a scalar, this is an alias for :py:attr:`~set_item` + with ``in_place = True``. + + If ``index`` is a sequence, this is an alias for :py:attr:`~get_slice` + with ``in_place = True``. + """ + if isinstance(index, str): + self.set_value(index, value, in_place=True) + else: + index, scalar = normalize_subscript(index, len(self), self._names) + if scalar: + self.set_value(index[0], value, in_place=True) + else: + self.set_slice(index, value, in_place=True) + + def _define_output(self, in_place: bool) -> "NamedList": + if in_place: + return self + newdata = self._data.copy() + newnames = None + if self._names is not None: + newnames = self._names.copy() + return type(self)(newdata, names=newnames, _validate=False) + + def safe_insert(self, index: Union[int, str], value: Any, in_place: bool = False) -> "NamedList": """ Args: index: @@ -169,34 +337,56 @@ def insert(self, index: Union[int, str], item: Any): Alternatively, the name of the value to insert at (the first occurrence of each name is used). - item: - A scalar that can be coerced into a string, or None. + value: + A value to be inserted into the current object. + + in_place: + Whether to modify the current object in place. Returns: - ``item`` is inserted at ``index`` in the current object. + A ``NamedList`` where ``value`` is inserted at ``index``. This is a + new object if ``in_place = False``, otherwise it is a reference to + the current object. If names are present in the current object, the + newly inserted element's name is set to an empty string. """ + output = self._define_output(in_place) if isinstance(index, str): - i = self._names.map(index) - if i < 0: - raise KeyError("no list element named '" + index + "'") - index = i - super().insert(index, item) - self._names.insert(index, "") + index = _name_to_position(self._names, index) + output._data.insert(index, value) + if output._names is not None: + output._names.insert(index, "") + return output + + def insert(self, index: Union[int, str], value: Any): + """Alias for :py:attr:`~safe_insert` with `in_place = True`.""" + self.safe_insert(index, value, in_place=True) - def append(self, item: Any): + def safe_append(self, value: Any, in_place: bool = False) -> "NamedList": """ Args: - item: + value: Any value. + in_place: + Whether to perform the modification in place. + Returns: - ``item`` is added to the end of the current object, with its name - set to an empty string. + A ``NamedList`` where ``value`` is added to the end. If ``in_place + = False``, this is a new object, otherwise it is a reference to the + current object. If names are present in the current object, the + newly added element has its name set to an empty string. """ - self._names.append("") - super().append(item) + output = self._define_output(in_place) + output._data.append(value) + if output._names is not None: + output._names.append("") + return output + + def append(self, value: Any): + """Alias for :py:attr:`~safe_append` with `in_place = True`.""" + self.safe_append(value, in_place=True) - def extend(self, iterable: Iterable): + def safe_extend(self, other: Iterable, in_place: bool = False): """ Args: iterable: @@ -204,42 +394,38 @@ def extend(self, iterable: Iterable): used to extend the names of the current object; otherwise the extended names are set to empty strings. + in_place: + Whether to perform the modification in place. + Returns: - Items in ``iterable`` are added to the end of the current object. + A ``NamedList`` where items in ``iterable`` are added to the end. + If ``in_place = False``, this is a new object, otherwise a reference + to the current object is returned. """ - super().extend(iterable) - if isinstance(iterable, NamedList): - self._names.extend(iterable._names) - elif len(iterable): - self._names.extend([""] * len(iterable)) + output = self._define_output(in_place) + previous_len = len(output) + output._data.extend(other) - def __add__(self, other: list) -> "NamedList": - """ - Args: - other: - A list of items to be added to the right of the current object. + if isinstance(other, NamedList): + if output._names is None: + output._names = Names([""] * previous_len) + output._names.extend(other._names) + elif output._names is not None: + output._names.extend([""] * len(other)) - Returns: - A new ``NamedList`` containing the concatenation of the - current object's items and those of ``other``. - """ - output = self.copy() - output.extend(other) return output - def __iadd__(self, other: list): - """ - Extend an existing ``NamedList`` with a new list. + def extend(self, other: Iterable): + """Alias for :py:attr:`~safe_extend` with ``in_place = True``.""" + self.safe_extend(other, in_place=True) - Args: - other: - A list of items. + def __add__(self, other: list) -> "NamedList": + """Alias for :py:attr:`~safe_extend`.""" + return self.safe_extend(other) - Returns: - The current object is extended with the contents of ``other``. If - ``other`` is a ``NamedList``, its names are used for extension; - otherwise the extension is performed with empty strings. - """ + def __iadd__(self, other: list): + """Alias for :py:attr:`~extend`, returning a reference to the current + object after the in-place modification.""" self.extend(other) return self @@ -248,7 +434,7 @@ def copy(self) -> "NamedList": Returns: A shallow copy of a ``NamedList`` with the same contents. """ - return NamedList(self, names=self._names.copy()) + return type(self)(self._data, names=self._names, _validate=False) def __deepcopy__(self, memo=None, _nil=[]) -> "NamedList": """ @@ -262,9 +448,9 @@ def __deepcopy__(self, memo=None, _nil=[]) -> "NamedList": Returns: A deep copy of a ``NamedList`` with the same contents. """ - return NamedList(deepcopy(self, memo, _nil), names=deepcopy(self_names, memo, _nil)) + return type(self)(deepcopy(self._data, memo, _nil), names=deepcopy(self._names, memo, _nil), _validate=False) - def as_dict(self) -> dict[str, Any]: + def as_dict(self) -> Dict[str, Any]: """ Returns: A dictionary where the keys are the names and the values are the @@ -276,23 +462,32 @@ def as_dict(self) -> dict[str, Any]: output[n] = self[i] return output + @staticmethod + def from_dict(x: dict) -> "NamedList": + return NamedList(list(x.values()), names=Names(str(y) for y in x.keys())) + @subset_sequence.register def _subset_sequence_NamedList(x: NamedList, indices: Sequence[int]) -> NamedList: - return NamedList((x[i] for i in indices), names=subset_sequence(x._names, indices)) + return x.get_slice(indices) @combine_sequences.register def _combine_sequences_NamedList(*x: NamedList) -> NamedList: - output = x[0].copy() + output = x[0]._define_output(in_place=False) for i in range(1, len(x)): output.extend(x[i]) return output @assign_sequence.register -def _assign_sequence_NamedList(x: NamedList, indices: Sequence[int], other) -> NamedList: - output = assign_sequence.registry[list](x, indices, other) +def _assign_sequence_NamedList(x: NamedList, indices: Sequence[int], other: Sequence) -> NamedList: if isinstance(other, NamedList): - output._names = assign_sequence(output._names, indices, other._names) - return output + # Do NOT set the names if 'other' is a NamedList. Names don't change + # during assignment/setting operations, as a matter of policy. This is + # for simplicity, efficiency (as the Names don't need to be reindexed) + # but mainly because 'indices' could have been derived from a sequence + # of names, and it would be weird for the same sequence of names to + # suddently become an invalid indexing vector after an assignment. + other = other._data + return type(x)(assign_sequence(x._data, indices, other), names=x._names) diff --git a/src/biocutils/StringList.py b/src/biocutils/StringList.py index a3f5777..2d3098e 100644 --- a/src/biocutils/StringList.py +++ b/src/biocutils/StringList.py @@ -1,169 +1,74 @@ -from typing import Any, Union, Optional +from typing import Any, Union, Optional, Sequence, Iterable from collections.abc import Iterable +from .Names import Names +from .NamedList import NamedList +from .subset_sequence import subset_sequence +from .combine_sequences import combine_sequences +from .assign_sequence import assign_sequence + def _coerce_to_str(x: Any) -> bool: return None if x is None else str(x) -class StringList(list): +class _SubscriptCoercer: + def __init__(self, data): + self._data = data + def __getitem__(self, index): + return _coerce_to_str(self._data[index]) + + +class StringList(NamedList): """ - Python list of strings. This is the same as a regular Python list except - that anything added to it will be coerced into a string. None values are - also acceptable and are treated as missing strings. + List of strings. This mimics a regular Python list except that anything + added to it will be coerced into a string. None values are also acceptable + and are treated as missing strings. The list may also be named (see + :py:class:`~NamedList`), which provides some dictionary-like functionality. """ - def __init__(self, iterable: Optional[Iterable] = None, coerce: bool = True): + def __init__(self, data: Optional[Iterable] = None, names: Optional[Names] = None, _validate: bool = True): """ Args: - iterable: + data: Some iterable object where all values can be coerced to strings or are None. Alternatively this may itself be None, which defaults to an empty list. - coerce: - Whether to perform the coercion to strings. This can be skipped - if it is known that ``iterable`` only contains strings or None. - """ - if iterable is not None: - new_it = iterable - if coerce and not isinstance(iterable, type(self)): - new_it = (_coerce_to_str(item) for item in iterable) - super().__init__(new_it) - else: - super().__init__() - - def __getitem__(self, index: Union[int, slice]) -> Union[str, "StringList"]: - """ - Obtain one or more elements from a ``StringList``. - - Args: - index: - An integer index containing a position to extract, or a slice - specifying multiple positions to extract. - - Returns: - If ``index`` is an integer, a string or None is returned at the - specified position. - - If ``index`` is a slice, a new ``StringList`` is returned - containing the items at the specified positions. - """ - output = super().__getitem__(index) - if isinstance(index, slice): - return StringList(output, coerce=False) - return output - - def __setitem__(self, index: Union[int, slice], item: Any): - """ - Set one or more items in the ``StringList``. - - Args: - index: - An integer index containing a position to set, or a slice - specifying multiple positions to set. - - item: - If ``index`` is an integer, a scalar that can be coerced into a - string, or None. - - If ``index`` is a slice, an iterable of the same length - containing values that can be coerced to strings or None. - - Returns: - In the current object, the specified item(s) at ``index`` are - replaced with the contents of ``item``. - """ - if isinstance(index, slice): - new_it = item - if not isinstance(item, type(self)): - new_it = (_coerce_to_str(x) for x in item) - super().__setitem__(index, new_it) - else: - super().__setitem__(index, _coerce_to_str(item)) - - def insert(self, index: int, item: Any): - """ - Insert an item in the ``StringList``. - - Args: - index: - An integer index containing a position to insert at. - - item: - A scalar that can be coerced into a string, or None. - - Returns: - ``item`` is inserted at ``index`` in the current object. - """ - super().insert(index, _coerce_to_str(item)) - - def append(self, item: Any): - """ - Append an item to the end of a ``StringList``. - - Args: - item: - A scalar that can be coerced into a string, or None. - - Returns: - ``item`` is added to the end of the current object. - """ - super().append(_coerce_to_str(item)) - - def extend(self, iterable: Iterable): - """ - Extend the end of a ``StringList`` with more items. - - Args: - iterable: - Some iterable object where all values can be coerced to strings - or are None. - - Returns: - Items in ``iterable`` are added to the end of the current object. - """ - new_it = iterable - if not isinstance(iterable, type(self)): - new_it = (_coerce_to_str(item) for item in iterable) - super().extend(new_it) - - def __add__(self, other: list) -> "StringList": - """ - Add a list to the right of a ``StringList``. - - Args: - other: - A list of items that can be coerced to strings or are None. - - Returns: - A new ``StringList`` containing the concatenation of the - current object's items and those of ``other``. - """ - output = self.copy() - output.extend(other) - return output - - def __iadd__(self, other: list): - """ - Extend an existing ``StringList`` with a new list. - - Args: - other: - A list of items that can be coerced to strings or are None. - - Returns: - The current object is extended with the contents of ``other``. - """ - self.extend(other) - return self - - def copy(self) -> "StringList": - """ - Make a copy of a ``StringList``. - - Returns: - A new ``StringList`` with the same contents. - """ - return StringList(self, coerce=False) + names: + Names for the list elements, defaults to an empty list. + + _validate: + Internal use only. + """ + if _validate: + if data is not None: + if isinstance(data, StringList): + data = data._data + else: + if isinstance(data, NamedList): + data = data._data + original = data + data = list(_coerce_to_str(item) for item in original) + super().__init__(data, names, _validate=_validate) + + def set_value(self, index: Union[int, str], value: Any, in_place: bool = False) -> "StringList": + """Calls :py:meth:`~NamedList.NamedList.set_value` after coercing ``value`` to a string.""" + return super().set_value(index, _coerce_to_str(value), in_place=in_place) + + def set_slice(self, index: Union[int, str, slice], value: Sequence, in_place: bool = False) -> "StringList": + """Calls :py:meth:`~NamedList.NamedList.set_slice` after coercing ``value`` to strings.""" + return super().set_slice(index, _SubscriptCoercer(value), in_place=in_place) + + def safe_insert(self, index: Union[int, str], value: Any, in_place: bool = False) -> "StringList": + """Calls :py:meth:`~NamedList.NamedList.safe_insert` after coercing ``value`` to a string.""" + return super().safe_insert(index, _coerce_to_str(value), in_place=in_place) + + def safe_append(self, value: Any, in_place: bool = False) -> "StringList": + """Calls :py:meth:`~NamedList.NamedList.safe_append` after coercing ``value`` to a string.""" + return super().safe_append(_coerce_to_str(value), in_place=in_place) + + def safe_extend(self, other: Iterable, in_place: bool = True) -> "StringList": + """Calls :py:meth:`~NamedList.NamedList.safe_extend` after coercing elements of ``other`` to strings.""" + return super().safe_extend((_coerce_to_str(y) for y in other), in_place=in_place) diff --git a/tests/test_Factor.py b/tests/test_Factor.py index a2b3543..bf8421d 100644 --- a/tests/test_Factor.py +++ b/tests/test_Factor.py @@ -9,7 +9,7 @@ def test_Factor_basics(): assert len(f) == 6 assert list(f) == ["A", "B", "C", "A", "C", "E"] assert list(f.get_codes()) == [0, 1, 2, 0, 2, 4] - assert f.get_levels() == ["A", "B", "C", "D", "E"] + assert f.get_levels().get_data() == ["A", "B", "C", "D", "E"] assert not f.get_ordered() # Works with missing values. @@ -83,48 +83,48 @@ def test_Factor_setitem(): f[0:2] = f2[2:4] assert list(f.get_codes()) == [2, 3, 2, 3, 2, 1] - assert f.get_levels() == ["A", "B", "C", "D", "E"] + assert f.get_levels().get_data() == ["A", "B", "C", "D", "E"] f = Factor([0, 1, 2, 3, 2, 1], levels=["A", "B", "C", "D", "E"]) f2 = Factor([0, 1, 2, 3, 2, 1], levels=["E", "D", "C", "B", "A"]) f[[-3, -2, -1]] = f2[0:3] assert list(f.get_codes()) == [0, 1, 2, 4, 3, 2] - assert f.get_levels() == ["A", "B", "C", "D", "E"] + assert f.get_levels().get_data() == ["A", "B", "C", "D", "E"] f = Factor([0, 1, 2, 3, 2, 1], levels=["A", "B", "C", "D", "E"]) f2 = Factor([0, 1, 2, 3, 2, 1], levels=["e", "d", "c", "b", "a"]) f[:] = f2[:] assert list(f.get_codes()) == [-1] * 6 - assert f.get_levels() == ["A", "B", "C", "D", "E"] + assert f.get_levels().get_data() == ["A", "B", "C", "D", "E"] def test_Factor_drop_unused_levels(): f = Factor([0, 1, 2, 0, 2, 4], levels=["A", "B", "C", "D", "E"]) f2 = f.drop_unused_levels() - assert f2.get_levels() == ["A", "B", "C", "E"] + assert f2.get_levels().get_data() == ["A", "B", "C", "E"] assert list(f2) == list(f) f = Factor([3, 4, 2, 3, 2, 4], levels=["A", "B", "C", "D", "E"]) f2 = f.drop_unused_levels(in_place=True) - assert f2.get_levels() == ["C", "D", "E"] + assert f2.get_levels().get_data() == ["C", "D", "E"] assert list(f2) == ["D", "E", "C", "D", "C", "E"] def test_Factor_set_levels(): f = Factor([0, 1, 2, 0, 2, 4], levels=["A", "B", "C", "D", "E"]) f2 = f.set_levels(["E", "D", "C", "B", "A"]) - assert f2.get_levels() == ["E", "D", "C", "B", "A"] + assert f2.get_levels().get_data() == ["E", "D", "C", "B", "A"] assert list(f2.get_codes()) == [4, 3, 2, 4, 2, 0] assert list(f2) == list(f) f = Factor([0, 1, 2, 0, 2, 4], levels=["A", "B", "C", "D", "E"]) f2 = f.set_levels(["E", "C", "A"], in_place=True) - assert f2.get_levels() == ["E", "C", "A"] + assert f2.get_levels().get_data() == ["E", "C", "A"] assert list(f2.get_codes()) == [2, -1, 1, 2, 1, 0] f = Factor([0, 1, 2, 0, 2, 4], levels=["A", "B", "C", "D", "E"]) f2 = f.set_levels("E") # reorders - assert f2.get_levels() == ["E", "A", "B", "C", "D"] + assert f2.get_levels().get_data() == ["E", "A", "B", "C", "D"] assert list(f2.get_codes()) == [1, 2, 3, 1, 3, 0] with pytest.raises(ValueError) as ex: @@ -164,7 +164,7 @@ def test_Factor_combine(): f1 = Factor([0, 2, 4, 2, 0], levels=["A", "B", "C", "D", "E"]) f2 = Factor([1, 3, 1], levels=["D", "E", "F", "G"]) out = combine(f1, f2) - assert out.get_levels() == ["A", "B", "C", "D", "E", "F", "G"] + assert out.get_levels().get_data() == ["A", "B", "C", "D", "E", "F", "G"] assert list(out.get_codes()) == [0, 2, 4, 2, 0, 4, 6, 4] f2 = Factor([1, 3, None], levels=["D", "E", "F", "G"]) diff --git a/tests/test_NamedList.py b/tests/test_NamedList.py index e007ce7..c16b394 100644 --- a/tests/test_NamedList.py +++ b/tests/test_NamedList.py @@ -1,134 +1,237 @@ import biocutils import pytest from biocutils import NamedList +from copy import deepcopy -def test_NamedList_basics(): +def test_NamedList_init(): x = NamedList([1,2,3,4], names=['a', 'b', 'c', 'd']) assert isinstance(x, NamedList) - assert x == [ 1,2,3,4 ] + assert x.get_data() == [ 1,2,3,4 ] assert x.get_names() == ["a", "b", "c", "d"] + assert len(x) == 4 - assert x["a"] == 1 - assert x["b"] == 2 + y = NamedList(x) + assert y.get_data() == [1,2,3,4] + assert y.get_names() is None # names are not carried over; this is intended, and not a bug. + + empty = NamedList() + assert empty.get_data() == [] + assert empty.get_names() is None + assert len(empty) == 0 + + x = NamedList([1,2,3,4]) + assert x.get_data() == [1,2,3,4] + assert x.get_names() is None + + +def test_NamedList_get_value(): + x = NamedList([1,2,3,4]) + assert x.get_value(0) == 1 + assert x.get_value(-1) == 4 with pytest.raises(KeyError) as ex: - x["Aaron"] + x.get_value("Aaron") assert str(ex.value).find("Aaron") >= 0 - # Constructor works with other NamedList objects. - y = NamedList(x) - assert y == x - assert y.get_names() == ["a", "b", "c", "d"] + x.set_names(["a", "b", "c", "d"], in_place=True) + assert x.get_value("a") == 1 + assert x.get_value("b") == 2 + with pytest.raises(KeyError) as ex: + x.get_value("Aaron") + assert str(ex.value).find("Aaron") >= 0 - empty = NamedList() - assert empty == [] - assert isinstance(empty, NamedList) - assert empty.get_names() == [] - # Slicing works correctly. - sub = x[1:3] - assert isinstance(sub, NamedList) - assert sub == [2, 3] - assert sub.get_names() == ["b", "c"] +def test_NamedList_get_slice(): + x = NamedList([1,2,3,4]) + + sub = x.get_slice([0, 2]) + assert sub.get_data() == [1, 3] + assert sub.get_names() is None + + sub = x.get_slice([False, True, True, False]) + assert sub.get_data() == [2, 3] + assert sub.get_names() is None + + with pytest.raises(Exception) as ex: + x.get_slice(["Aaron", "Foo"]) + assert str(ex.value).find("no names") >= 0 - # Copying works. - z = x.copy() - z[0] = "Aaron" - assert z == [ "Aaron", 2, 3, 4 ] - assert x == [ 1, 2, 3, 4 ] - assert z.get_names() == [ "a", "b", "c", "d" ] + x.set_names(["a", "b", "c", "d"], in_place=True) + sub = x.get_slice([0, 2]) + assert sub.get_data() == [1, 3] + assert sub.get_names() == ["a", "c"] + + sub = x.get_slice(["a", "d"]) + assert sub.get_data() == [1, 4] + assert sub.get_names() == ["a", "d"] + +# with pytest.raises(Exception) as ex: +# x.get_slice(["Aaron"]) +# assert str(ex.value).find("Aaron") >= 0 + + +def test_NamedList_get_item(): + x = NamedList([1,2,3,4], names=["a", "b", "c", "d"]) + assert x[0] == 1 + assert x["b"] == 2 + assert x[[0, 1]].get_data() == [1,2] + assert x[["b","d"]].get_data() == [2,4] def test_NamedList_dict(): x = NamedList([1,2,3,4], names=['a', 'b', 'c', 'd']) assert x.as_dict() == { "a": 1, "b": 2, "c": 3, "d": 4 } - x = NamedList({ "c": 4, "d": 5, 23: 99 }) + x = NamedList.from_dict({ "c": 4, "d": 5, 23: 99 }) assert x.get_names() == [ "c", "d", "23" ] - assert x == [ 4, 5, 99 ] + assert x.get_data() == [ 4, 5, 99 ] + + +def test_NamedList_set_value(): + x = NamedList([1,2,3,4]) + y = x.set_value(0, 10) + assert y.get_data() == [10, 2, 3, 4] + y = x.set_value(-1, 40) + assert y.get_data() == [1, 2, 3, 40] + + y = x.set_value("Aaron", 10) + assert y.get_data() == [1, 2, 3, 4, 10] + assert y.get_names() == ["", "", "", "", "Aaron"] + + x.set_names(["a", "b", "c", "d"], in_place=True) + y = x.set_value("a", 10) + assert y.get_data() == [10, 2, 3, 4] + y = x.set_value("d", 40) + assert y.get_data() == [1, 2, 3, 40] + y = x.set_value("Aaron", 10) + assert y.get_data() == [1, 2, 3, 4, 10] + assert y.get_names() == ["a", "b", "c", "d", "Aaron"] + + +def test_NamedList_set_slice(): + x = NamedList([1,2,3,4]) + y = x.set_slice([0, 3], [10, 40]) + assert y.get_data() == [10, 2, 3, 40] + y = x.set_slice([False, True, True, False], [20, 30]) + assert y.get_data() == [1, 20, 30, 4] + with pytest.raises(IndexError) as ex: + x.set_slice(["Aaron"], [10]) + assert str(ex.value).find("no names") >= 0 + + x.set_names(["a", "b", "c", "d"], in_place=True) + y = x.set_slice(["a", "d"], [10, 40]) + assert y.get_data() == [10, 2, 3, 40] +# with pytest.raises(KeyError) as ex: +# y = x.set_slice(["Aaron"], [10]) +# assert str(ex.value).find("Aaron") >= 0 def test_NamedList_setitem(): x = NamedList([1,2,3,4], names=["A", "B", "C", "D"]) x[0] = None - assert x == [None, 2, 3, 4] - assert x["A"] == None - - # Replacing by name. - x["B"] = "FOO" - assert x[1] == "FOO" + assert x.get_data() == [None, 2, 3, 4] + x["B"] = None + assert x.get_data() == [None, None, 3, 4] + x[["C", "D"]] = [30, 40] + assert x.get_data() == [None, None, 30, 40] + x["E"] = "FOO" + assert x.get_data() == [None, None, 30, 40, "FOO"] + assert x.get_names() == ["A", "B", "C", "D", "E"] + + +def test_NamedList_insert(): + x = NamedList([1,2,3,4]) + y = x.safe_insert(2, "FOO") + assert y.get_data() == [1, 2, "FOO", 3, 4] + assert y.get_names() is None + + x.set_names(["A", "B", "C", "D"], in_place=True) + x.insert(2, "FOO") + assert x.get_data() == [1, 2, "FOO", 3, 4] + assert x.get_names() == ["A", "B", "", "C", "D"] - # Replacing slices. - x[1:3] = [10, 20] - assert x == [None, 10, 20, 4] - x[1:3] = NamedList([4,5], names=["YAY", "BAR"]) - assert x == [None, 4, 5, 4] - assert x.get_names() == [ "A", "YAY", "BAR", "D" ] + x.insert("D", None) + assert x.get_data() == [1, 2, "FOO", 3, None, 4] + assert x.get_names() == [ "A", "B", "", "C", "", "D"] - # Appending by name. - x["Aaron"] = "BAR" - assert x["Aaron"] == "BAR" +def test_NamedList_extend(): + x = NamedList([1,2,3,4]) + y = x.safe_extend([None, 1, True]) + assert y.get_data() == [ 1, 2, 3, 4, None, 1, True ] + assert y.get_names() is None -def test_NamedList_mutations(): - # Insertion: - x = NamedList([1,2,3,4], names=["A", "B", "C", "D"]) - x.insert(2, "FOO") - assert x == [1, 2, "FOO", 3, 4] - assert x.get_names() == [ "A", "B", "", "C", "D"] - x.insert("D", None) - assert x == [1, 2, "FOO", 3, None, 4] - assert x.get_names() == [ "A", "B", "", "C", "", "D"] + y = x.safe_extend(NamedList([False, 2, None], names=[ "E", "F", "G" ])) + assert y.get_data() == [ 1, 2, 3, 4, False, 2, None ] + assert y.get_names() == [ "", "", "", "", "E", "F", "G" ] - # Extension: - x = NamedList([1,2,3,4], names=["A", "B", "C", "D"]) + x.set_names(["A", "B", "C", "D"], in_place=True) x.extend([None, 1, True]) - assert x == [ 1, 2, 3, 4, None, 1, True ] + assert x.get_data() == [ 1, 2, 3, 4, None, 1, True ] assert x.get_names() == [ "A", "B", "C", "D", "", "", "" ] + x.extend(NamedList([False, 2, None], names=[ "E", "F", "G" ])) - assert x == [ 1, 2, 3, 4, None, 1, True, False, 2, None ] + assert x.get_data() == [ 1, 2, 3, 4, None, 1, True, False, 2, None ] assert x.get_names() == [ "A", "B", "C", "D", "", "", "", "E", "F", "G" ] - # Appending: - x = NamedList([1,2,3,4], names=["A", "B", "C", "D"]) + +def test_NamedList_append(): + x = NamedList([1,2,3,4]) + y = x.safe_append(1) + assert y.get_data() == [ 1,2,3,4,1 ] + assert y.get_names() is None + + x.set_names(["A", "B", "C", "D"], in_place=True) x.append(1) - assert x == [ 1,2,3,4,1 ] + assert x.get_data() == [ 1,2,3,4,1 ] assert x.get_names() == [ "A", "B", "C", "D", "" ] def test_NamedList_addition(): x1 = NamedList([1,2,3,4], names=["A", "B", "C", "D"]) summed = x1 + [5,6,7] - assert summed == [1, 2, 3, 4, 5, 6, 7] + assert summed.get_data() == [1, 2, 3, 4, 5, 6, 7] assert summed.get_names() == [ "A", "B", "C", "D", "", "", "" ] x2 = NamedList([5,6,7], names=["E", "F", "G"]) summed = x1 + x2 - assert summed == [1, 2, 3, 4, 5, 6, 7] + assert summed.get_data() == [1, 2, 3, 4, 5, 6, 7] assert summed.get_names() == ["A", "B", "C", "D", "E", "F", "G"] x1 += x2 - assert x1 == [1, 2, 3, 4, 5, 6, 7] + assert x1.get_data() == [1, 2, 3, 4, 5, 6, 7] assert x1.get_names() == ["A", "B", "C", "D", "E", "F", "G"] +def test_NamedList_copy(): + x = NamedList([1,2,3,4]) + y = x.copy() + assert y.get_data() == x.get_data() + assert y.get_names() is None + + x = NamedList([1,2,3,4], names=["A", "B", "C", "D"]) + y = deepcopy(x) + assert y.get_data() == x.get_data() + assert y.get_names() == x.get_names() + + def test_NamedList_generics(): x = NamedList([1,2,3,4], names=["A", "B", "C", "D"]) sub = biocutils.subset_sequence(x, [0,3,2,1]) assert isinstance(sub, NamedList) - assert sub == [1, 4, 3, 2] + assert sub.get_data() == [1, 4, 3, 2] assert sub.get_names() == [ "A", "D", "C", "B" ] y = ["a", "b", "c", "d"] com = biocutils.combine_sequences(x, y) assert isinstance(com, NamedList) - assert com == [1, 2, 3, 4, "a", "b", "c", "d"] + assert com.get_data() == [1, 2, 3, 4, "a", "b", "c", "d"] assert com.get_names() == [ "A", "B", "C", "D", "", "", "", "" ] y = biocutils.assign_sequence(x, [1, 3], [ 20, 40 ]) - assert y == [ 1, 20, 3, 40 ] + assert y.get_data() == [ 1, 20, 3, 40 ] assert y.get_names() == [ "A", "B", "C", "D" ] y = biocutils.assign_sequence(x, [1, 3], NamedList([ 20, 40 ], names=["b", "d" ])) - assert y == [ 1, 20, 3, 40 ] - assert y.get_names() == [ "A", "b", "C", "d" ] + assert y.get_data() == [ 1, 20, 3, 40 ] + assert y.get_names() == [ "A", "B", "C", "D" ] # doesn't set the names, as per policy. diff --git a/tests/test_StringList.py b/tests/test_StringList.py index 2754afc..00fcde8 100644 --- a/tests/test_StringList.py +++ b/tests/test_StringList.py @@ -1,52 +1,66 @@ import biocutils -from biocutils import StringList +from biocutils import StringList, NamedList -def test_StringList_basics(): +def test_StringList_init(): x = StringList([1,2,3,4]) assert isinstance(x, StringList) - assert x == [ '1', '2', '3', '4' ] - assert x[0] == "1" + assert x.get_data() == [ '1', '2', '3', '4' ] + assert x.get_names() is None # Constructor works with other StringList objects. - assert StringList(x) == x + recon = StringList(x) + assert recon.get_data() == x.get_data() empty = StringList() - assert empty == [] - assert isinstance(empty, StringList) - - # Slicing works correctly. - sub = x[1:3] - assert isinstance(sub, StringList) - assert sub == ["2", "3"] + assert empty.get_data() == [] # Constructor works with Nones. x = StringList([1,None,None,4]) - assert x == [ '1', None, None, '4' ] + assert x.get_data() == [ '1', None, None, '4' ] + + # Constructor works with other NamedList objects. + x = NamedList([True, False, None, 2]) + recon = StringList(x) + assert recon.get_data() == ["True", "False", None, "2"] + + +def test_StringList_getitem(): + x = StringList([1,2,3,4]) + + assert x[0] == "1" + sub = x[1:3] + assert isinstance(sub, StringList) + assert sub.get_data() == ["2", "3"] - # Copying works. - z = x.copy() - z[0] = "Aaron" - assert z == [ "Aaron", None, None, "4" ] - assert x == [ "1", None, None, "4" ] + x.set_names(["A", "B", "C", "D"], in_place=True) + assert x["C"] == "3" + sub = x[["C", "D", "A", "B"]] + assert isinstance(sub, StringList) + assert sub.get_data() == ["3", "4", "1", "2"] def test_StringList_setitem(): x = StringList([1,2,3,4]) x[0] = None - assert x == [None, "2", "3", "4"] + assert x.get_data() == [None, "2", "3", "4"] x[0] = 12345 - assert x == ["12345", "2", "3", "4"] + assert x.get_data() == ["12345", "2", "3", "4"] x[1:3] = [10, 20] - assert x == ["12345", "10", "20", "4"] + assert x.get_data() == ["12345", "10", "20", "4"] x[0:4:2] = [None, None] - assert x == [None, "10", None, "4"] + assert x.get_data() == [None, "10", None, "4"] - alt = StringList([ "YAY", "FOO", "BAR", "WHEE" ]) - x[:] = alt - assert x == alt + x.set_names(["A", "B", "C", "D"], in_place=True) + x["C"] = "3" + assert x.get_data() == [None, "10", "3", "4"] + x[["A", "B"]] = [True, False] + assert x.get_data() == ["True", "False", "3", "4"] + x["E"] = 50 + assert x.get_data() == ["True", "False", "3", "4", "50"] + assert x.get_names() == [ "A", "B", "C", "D", "E" ] def test_StringList_mutations(): @@ -54,14 +68,14 @@ def test_StringList_mutations(): x = StringList([1,2,3,4]) x.insert(2, None) x.insert(1, "FOO") - assert x == [ "1", "FOO", "2", None, "3", "4" ] + assert x.get_data() == [ "1", "FOO", "2", None, "3", "4" ] # Extension: x.extend([None, 1, True]) - assert x == [ "1", "FOO", "2", None, "3", "4", None, "1", "True" ] + assert x.get_data() == [ "1", "FOO", "2", None, "3", "4", None, "1", "True" ] alt = StringList([ "YAY", "BAR", "WHEE" ]) x.extend(alt) - assert x == [ "1", "FOO", "2", None, "3", "4", None, "1", "True", "YAY", "BAR", "WHEE" ] + assert x.get_data() == [ "1", "FOO", "2", None, "3", "4", None, "1", "True", "YAY", "BAR", "WHEE" ] # Appending: x.append(1) @@ -70,24 +84,17 @@ def test_StringList_mutations(): assert x[-1] == None -def test_StringList_addition(): - x1 = StringList([1,2,3,4]) - assert x1 + [5,6,7] == ["1", "2", "3", "4", "5", "6", "7"] - - x2 = StringList([5,6,7]) - assert x1 + x2 == ["1", "2", "3", "4", "5", "6", "7"] - - x1 += x2 - assert x1 == ["1", "2", "3", "4", "5", "6", "7"] - - def test_StringList_generics(): x = StringList([1,2,3,4]) sub = biocutils.subset_sequence(x, [0,3,2,1]) assert isinstance(sub, StringList) - assert sub == ["1", "4", "3", "2"] + assert sub.get_data() == ["1", "4", "3", "2"] y = ["a", "b", "c", "d"] com = biocutils.combine_sequences(x, y) assert isinstance(com, StringList) - assert com == ["1", "2", "3", "4", "a", "b", "c", "d"] + assert com.get_data() == ["1", "2", "3", "4", "a", "b", "c", "d"] + + ass = biocutils.assign_sequence(x, [1,3], ["a", "b"]) + assert isinstance(ass, StringList) + assert ass.get_data() == ["1", "a", "3", "b"]