From aedb9a4943843ace5cd23ac6814e689aac5c1eb6 Mon Sep 17 00:00:00 2001
From: Max Hargreaves <hargreaves.max@gene.com>
Date: Tue, 19 Sep 2023 16:57:42 -0700
Subject: [PATCH 1/6] Change: progress

---
 pyproject.toml                  |   5 +
 setup.cfg                       |   4 +-
 src/biocframe/BiocFrame.py      | 439 ++++++++++++++++++--------------
 src/biocframe/__init__.py       |  24 +-
 src/biocframe/_type_checks.py   |  11 +-
 src/biocframe/_validators.py    |  98 ++++---
 src/biocframe/io/from_pandas.py |   7 +-
 src/biocframe/types.py          |  41 ++-
 src/biocframe/utils.py          |  54 ++--
 tests/test_utils.py             |   6 +-
 10 files changed, 396 insertions(+), 293 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index a7cea75..5980f62 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,6 +12,9 @@ line-length = 120
 src = ["src"]
 exclude = ["tests"]
 extend-ignore = ["F821"]
+select = ["E", "F", "I", "D", "PLC", "A", "RUF"]
+ignore = ["E501", "D203", "D213", "A003"]
+unfixable = ["F401", "F841"]
 
 [tool.ruff.pydocstyle]
 convention = "google"
@@ -19,5 +22,7 @@ convention = "google"
 [tool.ruff.per-file-ignores]
 "__init__.py" = ["E402", "F401"]
 
+"**/__init__.py" = ["PLC0414"]
+
 [tool.black]
 force-exclude = "__init__.py"
diff --git a/setup.cfg b/setup.cfg
index b38a996..b8fa774 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -41,7 +41,7 @@ package_dir =
     =src
 
 # Require a min/specific Python version (comma-separated conditions)
-# python_requires = >=3.8
+python_requires = >=3.8
 
 # Add here dependencies of your project (line-separated), e.g. requests>=2.2,<3.0.
 # Version specifiers like >=2.2,<3.0 avoid problems due to API changes in
@@ -50,6 +50,7 @@ package_dir =
 install_requires =
     importlib-metadata; python_version<"3.8"
     prettytable
+    pandas
 
 [options.packages.find]
 where = src
@@ -68,7 +69,6 @@ testing =
     pytest
     pytest-cov
     numpy
-    pandas
 
 [options.entry_points]
 # Add here console scripts like:
diff --git a/src/biocframe/BiocFrame.py b/src/biocframe/BiocFrame.py
index 2b01302..79ffa9f 100644
--- a/src/biocframe/BiocFrame.py
+++ b/src/biocframe/BiocFrame.py
@@ -1,18 +1,43 @@
-from collections import OrderedDict
-from typing import List, MutableMapping, Optional, Sequence, Tuple, Union
-
-from pandas.api.types import is_numeric_dtype
+"""A Bioconductor-like data frame."""
+
+from typing import (
+    Any,
+    Dict,
+    List,
+    Optional,
+    Tuple,
+    Union,
+    cast,
+    overload,
+)
+
+from pandas.api.types import is_numeric_dtype  # type: ignore
 from prettytable import PrettyTable
 
 from ._type_checks import is_list_of_type
 from ._validators import validate_cols, validate_rows, validate_unique_list
-from .types import SlicerArgTypes, SlicerTypes
-from .utils import _match_to_indices, _slice_or_index
+from .types import (
+    AllSlice,
+    AtomicSlice,
+    BiocSeq,
+    ColType,
+    DataType,
+    RangeSlice,
+    SimpleSlice,
+)
+from .utils import match_to_indices, slice_or_index
+
+try:
+    from pandas import DataFrame, RangeIndex
+except Exception:
+    pass
 
 __author__ = "jkanche"
 __copyright__ = "jkanche"
 __license__ = "MIT"
 
+ItemType = Union["BiocFrame", ColType]
+
 
 class BiocFrameIter:
     """An iterator to a :py:class:`~biocframe.BiocFrame.BiocFrame` object.
@@ -25,12 +50,12 @@ def __init__(self, obj: "BiocFrame") -> None:
         self._bframe = obj
         self._current_index = 0
 
-    def __iter__(self):
+    def __iter__(self) -> "BiocFrameIter":
         return self
 
     def __next__(self):
         if self._current_index < len(self._bframe):
-            iter_row_index = (
+            iter_row_index: Optional[str] = (
                 self._bframe.row_names[self._current_index]
                 if self._bframe.row_names is not None
                 else None
@@ -46,7 +71,7 @@ def __next__(self):
 class BiocFrame:
     """`BiocFrame` is an alternative to :class:`~pandas.DataFrame`.
 
-    Columns may extend :class:`~collections.abc.Sequence`,
+    Columns may extend :class:`~collections.abc.List`,
     and must implement the length (``__len__``) and slice (``__getitem__``) dunder
     methods. This allows :py:class:`~biocframe.BiocFrame.BiocFrame` to accept nested
     `BiocFrame` objects as columns.
@@ -86,59 +111,67 @@ class BiocFrame:
     .. code-block:: python
 
         sliced_bframe = bframe[1:2, [True, False, False]]
-
-    Attributes:
-        data (MutableMapping[str, Union[Sequence, MutableMapping]], optional):
-            Dictionary of column names as `keys` and their values. all columns must have
-            the same length. Defaults to None.
-        number_of_rows (int, optional): Number of rows. Defaults to None.
-        row_names (Sequence, optional): Row index names. Defaults to None.
-        column_names (Sequence[str], optional): Column names, if not provided,
-            is automatically inferred from data. Defaults to None.
-        metadata (MutableMapping, optional): Additional metadata. Defaults to None.
-
-    Raises:
-        ValueError: if rows or columns mismatch from data.
     """
 
     def __init__(
         self,
-        data: Optional[MutableMapping[str, Union[Sequence, MutableMapping]]] = None,
+        data: Optional[DataType] = None,
         number_of_rows: Optional[int] = None,
-        row_names: Optional[Sequence[str]] = None,
-        column_names: Optional[Sequence[str]] = None,
-        metadata: Optional[MutableMapping] = None,
+        row_names: Optional[List[str]] = None,
+        column_names: Optional[List[str]] = None,
+        metadata: Optional[Dict[str, Any]] = None,
     ) -> None:
-        self._number_of_rows = number_of_rows
+        """Initialize a `BiocFrame` object.
+
+        Args:
+            data (Dict[str, Union[List, Dict, BioSeq]], optional):
+                Dictionary of column names as `keys` and their values. all columns must have
+                the same length. Defaults to None.
+            number_of_rows (int, optional): Number of rows. Defaults to None.
+            row_names (List, optional): Row index names. Defaults to None.
+            column_names (List[str], optional): Column names, if not provided,
+                is automatically inferred from data keys. Defaults to None.
+            metadata (Dict, optional): Additional metadata. Defaults to None.
+
+        Raises:
+            ValueError: if rows or columns mismatch from data.
+        """
+        self._data: DataType = {} if data is None else data
         self._row_names = row_names
-        self._data = {} if data is None else data
-        self._column_names = column_names
+        self._number_of_rows = validate_rows(
+            self._data, number_of_rows, self._row_names
+        )
+        self._column_names, self._data = validate_cols(
+            column_names, self._data
+        )
+        self._number_of_columns = len(self._column_names)
         self._metadata = metadata
 
-        self._validate()
+    @classmethod
+    def from_pandas(cls, df: "DataFrame") -> "BiocFrame":
+        """Read a :py:class:`~biocframe.BiocFrame.BiocFrame` from :py:class:`~pandas.DataFrame` object.
 
-    def _validate(self):
-        """Internal method to validate the object.
+        Args:
+            df (:py:class:`~pandas.DataFrame`): Input data.
 
         Raises:
-            ValueError: When all columns does not contain the
-                same number of rows.
-            ValueError: When row index is not unique.
-        """
+            TypeError: If ``input`` is not a :py:class:`~pandas.DataFrame`.
 
-        self._number_of_rows = validate_rows(
-            self._data, self._number_of_rows, self._row_names
-        )
-        self._column_names, self._data = validate_cols(self._column_names, self._data)
+        Returns:
+            BiocFrame: A :py:class:`~biocframe.BiocFrame.BiocFrame` object.
+        """
+        r_data: Dict[str, List[Any]] = df.to_dict("list")  # type: ignore
+        r_index: Optional[List[str]] = None
 
-        self._number_of_columns = len(self._column_names)
+        if df.index is not RangeIndex:  # type: ignore
+            r_index = df.index.to_list()  # type: ignore
 
-        if self._number_of_rows is None:
-            self._number_of_rows = 0
+        return BiocFrame(data=r_data, row_names=r_index)
 
     def __repr__(self) -> str:
+        """Get a machine-readable string representation of the object."""
         table = PrettyTable(padding_width=1)
-        table.field_names = [str(col) for col in self.column_names]
+        table.field_names = [str(col) for col in self._column_names]
 
         _rows = []
         rows_to_show = 2
@@ -155,7 +188,7 @@ def __repr__(self) -> str:
 
         if self.shape[0] > 2 * rows_to_show:
             # add ...
-            _rows.append(["..." for _ in range(len(self.column_names))])
+            _rows.append(["..." for _ in range(len(self._column_names))])
 
         _last = self.shape[0] - _top
         if _last <= rows_to_show:
@@ -189,31 +222,31 @@ def shape(self) -> Tuple[int, int]:
         return (self._number_of_rows, self._number_of_columns)
 
     @property
-    def row_names(self) -> Optional[List]:
-        """Access row index (names).
+    def row_names(self) -> Optional[List[str]]:
+        """Get/set the row names.
 
-        Returns:
-            (List, optional): Row names if available, else None.
-        """
-        return self._row_names
-
-    @row_names.setter
-    def row_names(self, names: Optional[Sequence]):
-        """Set new row index. All values in ``names`` must be unique.
+        Set new row index. All values in ``names`` must be unique.
 
         Args:
-            names (Sequence, optional): A list of unique values.
+            names (List, optional): A list of unique values, or `None`. If
+            `None` row names are removed.
+
+        Returns:
+            (List, optional): Row names if available, else None.
 
         Raises:
             ValueError: Length of ``names`` does not match number of rows.
             ValueError: ``names`` is not unique.
         """
+        return self._row_names
 
+    @row_names.setter
+    def row_names(self, names: Optional[List[str]]) -> None:
         if names is not None:
             if len(names) != self.shape[0]:
                 raise ValueError(
-                    "Length of `names` does not match the number of rows, need to be "
-                    f"{self.shape[0]} but provided {len(names)}."
+                    "Length of `names` does not match the number of rows, "
+                    f"need to be {self.shape[0]} but provided {len(names)}."
                 )
 
             if not validate_unique_list(names):
@@ -222,58 +255,53 @@ def row_names(self, names: Optional[Sequence]):
         self._row_names = names
 
     @property
-    def data(self) -> MutableMapping[str, Union[Sequence, MutableMapping]]:
+    def data(self) -> DataType:
         """Access data as :py:class:`dict`.
 
         Returns:
-            MutableMapping[str, Union[Sequence, MutableMapping]]:
+            Dict[str, Union[List, Dict]]:
                 Dictionary of columns and their values.
         """
         return self._data
 
     @property
-    def column_names(self) -> list:
-        """Access column names.
+    def column_names(self) -> List[str]:
+        """Get/set the column_names.
+
+        Args:
+            names (List[str]): A list of unique values.
 
         Returns:
             list: A list of column names.
-        """
-        return self._column_names
-
-    @column_names.setter
-    def column_names(self, names: Sequence[str]):
-        """Set new column names. New names must be unique.
-
-        Args:
-            names (Sequence[str]): A list of unique values.
 
         Raises:
             ValueError: Length of ``names`` does not match number of columns.
             ValueError: ``names`` is not unique.
         """
+        return self._column_names
 
-        if names is None:
-            raise ValueError("`names` cannot be `None`!")
-
+    @column_names.setter
+    def column_names(self, names: List[str]) -> None:
         if len(names) != self._number_of_columns:
             raise ValueError(
-                "Length of `names` does not match number of columns, need to be "
-                f"{self._number_of_columns} but provided {len(names)}."
+                "Length of `names` does not match number of columns. Needs to "
+                f"be {self._number_of_columns} but provided {len(names)}."
             )
 
         if not (validate_unique_list(names)):
             raise ValueError("Column names must be unique!")
 
-        new_data = OrderedDict()
-        for idx in range(len(names)):
-            new_data[names[idx]] = self._data[self.column_names[idx]]
-
         self._column_names = names
-        self._data = new_data
+        self._data = {
+            names[i]: v for i, (_, v) in enumerate(self.data.items())
+        }
 
     @property
-    def metadata(self) -> Optional[dict]:
-        """Access metadata.
+    def metadata(self) -> Optional[Dict[str, Any]]:
+        """Get/set the metadata.
+
+        Args:
+            metadata (Dict, Optional): New metadata object.
 
         Returns:
             (dict, optional): Metadata if available.
@@ -281,18 +309,7 @@ def metadata(self) -> Optional[dict]:
         return self._metadata
 
     @metadata.setter
-    def metadata(self, metadata: Optional[MutableMapping]):
-        """Set new metadata.
-
-        Args:
-            metadata (MutableMapping, Optional): New metadata object.
-        """
-        if metadata is not None:
-            if not isinstance(metadata, dict):
-                raise TypeError(
-                    f"`metadata` must be a dictionary, provided {type(metadata)}"
-                )
-
+    def metadata(self, metadata: Optional[Dict[str, Any]]):
         self._metadata = metadata
 
     def has_column(self, name: str) -> bool:
@@ -306,53 +323,35 @@ def has_column(self, name: str) -> bool:
         """
         return name in self.column_names
 
-    def column(self, index_or_name: Union[str, int]) -> Union[Sequence, MutableMapping]:
-        """Access a column by integer position or column label.
-
-        Args:
-            index_or_name (Union[str, int]): Name of the column, must be present in
-                :py:attr:`~biocframe.BiocFrame.BiocFrame.column_names`.
-
-                Alternatively, you may provide the integer index of the column to
-                access.
-
-        Raises:
-            ValueError: if ``index_or_name`` is not in column names.
-            ValueError: if integer index is greater than number of columns.
-            TypeError: if ``index_or_name`` is neither a string nor an integer.
-
-        Returns:
-            Union[Sequence, MutableMapping]: Column with its original type preserved.
-        """
-
-        return self[:, index_or_name]
-
-    def row(self, index_or_name: Union[str, int]) -> dict:
-        """Access a row by integer position or row name.
-
-        Args:
-            index_or_name (Union[str, int]): Integer index of the row to access.
-
-                Alternatively, you may provide a string specifying the row to access,
-                only if :py:attr:`~biocframe.BiocFrame.BiocFrame.row_names` are
-                available.
-
-        Raises:
-            ValueError: if ``index_or_name`` not in row names.
-            ValueError: if integer index greater than number of rows.
-            TypeError: if ``index_or_name`` is neither a string nor an integer.
+    @overload
+    def _slice(
+        self,
+        row_indices_or_names: Optional[AtomicSlice],
+        column_indices_or_names: Optional[AtomicSlice],
+    ) -> Dict[str, Any]:
+        ...
 
-        Returns:
-            dict: A dictionary with keys as column names and their values.
-        """
+    @overload
+    def _slice(
+        self,
+        row_indices_or_names: Optional[RangeSlice],
+        column_indices_or_names: Optional[RangeSlice],
+    ) -> ItemType:
+        ...
 
-        return self[index_or_name, :]
+    @overload
+    def _slice(
+        self,
+        row_indices_or_names: Union[AtomicSlice, slice],
+        column_indices_or_names: Union[AtomicSlice, slice],
+    ) -> ItemType:
+        ...
 
     def _slice(
         self,
-        row_indices_or_names: Optional[SlicerTypes] = None,
-        column_indices_or_names: Optional[SlicerTypes] = None,
-    ) -> Union["BiocFrame", dict, list]:
+        row_indices_or_names: Optional[AllSlice] = None,
+        column_indices_or_names: Optional[AllSlice] = None,
+    ) -> ItemType:
         """Internal method to slice by index or values.
 
         Args:
@@ -370,8 +369,6 @@ def _slice(
             - If a single column is sliced, returns a :py:class:`list`.
             - For all other scenarios, returns the same type as caller with the subsetted rows and columns.
         """
-
-        new_data = OrderedDict()
         new_row_names = self.row_names
         new_column_names = self.column_names
         is_row_unary = False
@@ -379,14 +376,15 @@ def _slice(
 
         # slice the columns and data
         if column_indices_or_names is not None:
-            new_column_indices, is_col_unary = _match_to_indices(
+            new_column_indices, is_col_unary = match_to_indices(
                 self.column_names, column_indices_or_names
             )
 
-            new_column_names = _slice_or_index(new_column_names, new_column_indices)
+            new_column_names = cast(
+                List[str], slice_or_index(new_column_names, new_column_indices)
+            )
 
-        for col in new_column_names:
-            new_data[col] = self._data[col]
+        new_data = {col: self._data[col] for col in new_column_names}
 
         # slice the rows of the data
         new_number_of_rows = None
@@ -395,44 +393,55 @@ def _slice(
             if temp_row_names is None:
                 temp_row_names = list(range(self.shape[0]))
 
-            new_row_indices, is_row_unary = _match_to_indices(
+            new_row_indices, is_row_unary = match_to_indices(
                 temp_row_names, row_indices_or_names
             )
 
-            new_row_names = _slice_or_index(temp_row_names, new_row_indices)
+            new_row_names = slice_or_index(temp_row_names, new_row_indices)
             new_number_of_rows = len(new_row_names)
 
             for k, v in new_data.items():
-                if hasattr(v, "shape"):
-                    tmp = [slice(None)] * len(v.shape)
+                if isinstance(v, BiocSeq):
+                    tmp: List[SimpleSlice] = [slice(None)] * len(v.shape)
                     tmp[0] = new_row_indices
                     new_data[k] = v[(*tmp,)]
                 else:
-                    new_data[k] = _slice_or_index(v, new_row_indices)
+                    new_data[k] = slice_or_index(v, new_row_indices)
         else:
             new_number_of_rows = self.shape[0]
 
         if is_row_unary is True:
-            rdata = {}
-            for col in new_column_names:
-                rdata[col] = new_data[col][0]
-            return rdata
-        elif is_col_unary is True:
+            return {
+                col: next(
+                    iter(
+                        new_data[col].values()  # type: ignore
+                        if isinstance(new_data[col], dict)
+                        else new_data[col]
+                    )
+                )
+                for col in new_column_names
+            }
+
+        if is_col_unary is True:
             return new_data[new_column_names[0]]
 
-        current_class_const = type(self)
-        return current_class_const(
+        return type(self)(
             data=new_data,
             number_of_rows=new_number_of_rows,
             row_names=new_row_names,
             column_names=new_column_names,
         )
 
+    @overload
+    def __getitem__(self, __key: AtomicSlice) -> Dict[str, Any]:
+        ...
+
+    @overload
+    def __getitem__(self, __key: RangeSlice) -> ItemType:
+        ...
+
     # TODO: implement in-place or views
-    def __getitem__(
-        self,
-        args: SlicerArgTypes,
-    ) -> Union["BiocFrame", dict, list]:
+    def __getitem__(self, __key: AllSlice) -> ItemType:
         """Subset the data frame.
 
         This operation returns a new object with the same type as caller.
@@ -498,46 +507,84 @@ def __getitem__(
             - If a single column is sliced, returns a :py:class:`list`.
             - For all other scenarios, returns the same type as caller with the subsetted rows and columns.
         """
-
         # not an array, single str, slice by column
-        if isinstance(args, str):
-            return self._slice(None, args)
+        if isinstance(__key, str):
+            return self._slice(None, __key)
 
-        if isinstance(args, int):
-            return self._slice(args, None)
+        if isinstance(__key, bool):
+            return self._slice(__key, None)
+
+        if isinstance(__key, int):
+            return self._slice(__key, None)
 
         # not an array, a slice
-        if isinstance(args, slice):
-            return self._slice(args, None)
+        if isinstance(__key, slice):
+            return self._slice(__key, None)
 
-        if isinstance(args, list):
+        if isinstance(__key, list):
             # column names if everything is a string
-            if is_list_of_type(args, str):
-                return self._slice(None, args)
-            elif is_list_of_type(args, int):
-                return self._slice(args, None)
+            if is_list_of_type(__key, str):
+                return self._slice(None, __key)
+            elif is_list_of_type(__key, int):
+                return self._slice(__key, None)
+            elif is_list_of_type(__key, bool):
+                return self._slice(__key, None)
             else:
                 raise ValueError("`args` is not supported.")
 
         # tuple
-        if isinstance(args, tuple):
-            if len(args) == 0:
-                raise ValueError("`args` must contain at least one slice.")
-
-            if len(args) == 1:
-                return self._slice(args[0], None)
-            elif len(args) == 2:
-                return self._slice(
-                    args[0],
-                    args[1],
-                )
-            else:
-                raise ValueError("Length of `args` is more than 2.")
+        if len(__key) == 0:
+            raise ValueError("`args` must contain at least one slice.")
+
+        if len(__key) == 1:
+            return self._slice(__key[0], None)
+        elif len(__key) == 2:
+            return self._slice(__key[0], __key[1])
+        else:
+            raise ValueError("Length of `args` is more than 2.")
 
-        raise TypeError("`args` is not supported.")
+    def column(self, index_or_name: AtomicSlice) -> ItemType:
+        """Access a column by integer position or column label.
+
+        Args:
+            index_or_name (Union[str, int]): Name of the column, must be present in
+                :py:attr:`~biocframe.BiocFrame.BiocFrame.column_names`.
+
+                Alternatively, you may provide the integer index of the column to
+                access.
+
+        Raises:
+            ValueError: if ``index_or_name`` is not in column names.
+            ValueError: if integer index is greater than number of columns.
+            TypeError: if ``index_or_name`` is neither a string nor an integer.
+
+        Returns:
+            Any: Column with its original type preserved.
+        """
+        return self[:, index_or_name]
+
+    def row(self, index_or_name: AtomicSlice) -> Dict[str, Any]:
+        """Access a row by integer position or row name.
+
+        Args:
+            index_or_name (Union[str, int]): Integer index of the row to access.
+
+                Alternatively, you may provide a string specifying the row to access,
+                only if :py:attr:`~biocframe.BiocFrame.BiocFrame.row_names` are
+                available.
+
+        Raises:
+            ValueError: if ``index_or_name`` not in row names.
+            ValueError: if integer index greater than number of rows.
+            TypeError: if ``index_or_name`` is neither a string nor an integer.
+
+        Returns:
+            Any: A dictionary with keys as column names and their values.
+        """
+        return self[index_or_name, :]
 
     # TODO: implement in-place or views
-    def __setitem__(self, name: str, value: Sequence):
+    def __setitem__(self, name: str, value: ColType) -> None:
         """Add or re-assign a value to a column.
 
         Usage:
@@ -560,7 +607,7 @@ def __setitem__(self, name: str, value: Sequence):
 
         Args:
             name (str): Name of the column.
-            value (Sequence): New value to set.
+            value (List): New value to set.
 
         Raises:
             ValueError: If length of ``value`` does not match the number of rows.
@@ -575,7 +622,8 @@ def __setitem__(self, name: str, value: Sequence):
             self._column_names.append(name)
             self._number_of_columns += 1
 
-        self._data[name] = value
+        # Dunno how to fix this one...
+        self._data[name] = value  # type: ignore
 
     # TODO: implement in-place or view
     def __delitem__(self, name: str):
@@ -654,7 +702,6 @@ def __array_ufunc__(self, func, method, *inputs, **kwargs) -> "BiocFrame":
         Returns:
             An object with the same type as caller.
         """
-
         from pandas import Series
 
         input = inputs[0]
@@ -668,9 +715,10 @@ def __array_ufunc__(self, func, method, *inputs, **kwargs) -> "BiocFrame":
 
         return input
 
+    ###########################################################################
     # compatibility with Pandas
     @property
-    def columns(self) -> list:
+    def columns(self) -> List[str]:
         """Alias to :py:meth:`~biocframe.BiocFrame.BiocFrame.column_names`.
 
         Returns:
@@ -679,7 +727,7 @@ def columns(self) -> list:
         return self.column_names
 
     @property
-    def index(self) -> Optional[list]:
+    def index(self) -> Optional[List[str]]:
         """Alias to :py:meth:`~biocframe.BiocFrame.BiocFrame.row_names`.
 
         Returns:
@@ -687,9 +735,10 @@ def index(self) -> Optional[list]:
         """
         return self.row_names
 
+    ###########################################################################
     # compatibility with R interfaces
     @property
-    def rownames(self) -> Optional[list]:
+    def rownames(self) -> Optional[List[str]]:
         """Alias to :py:meth:`~biocframe.BiocFrame.BiocFrame.row_names`.
 
         Returns:
@@ -698,7 +747,7 @@ def rownames(self) -> Optional[list]:
         return self.row_names
 
     @rownames.setter
-    def rownames(self, names: list):
+    def rownames(self, names: List[str]):
         """Alias to :py:meth:`~biocframe.BiocFrame.BiocFrame.row_names`.
 
         Args:
@@ -707,7 +756,7 @@ def rownames(self, names: list):
         self.row_names = names
 
     @property
-    def colnames(self) -> list:
+    def colnames(self) -> List[str]:
         """Alias to :py:meth:`~biocframe.BiocFrame.BiocFrame.column_names`.
 
         Returns:
@@ -716,7 +765,7 @@ def colnames(self) -> list:
         return self.column_names
 
     @colnames.setter
-    def colnames(self, names: list):
+    def colnames(self, names: List[str]):
         """Alias to :py:meth:`~biocframe.BiocFrame.BiocFrame.column_names`.
 
         Args:
diff --git a/src/biocframe/__init__.py b/src/biocframe/__init__.py
index f639c2b..08f9351 100644
--- a/src/biocframe/__init__.py
+++ b/src/biocframe/__init__.py
@@ -1,19 +1,21 @@
-import sys
+from sys import version_info
 
-if sys.version_info[:2] >= (3, 8):
-    # TODO: Import directly (no need for conditional) when `python_requires = >= 3.8`
-    from importlib.metadata import PackageNotFoundError, version  # pragma: no cover
+if version_info[:2] >= (3, 8):
+    from importlib.metadata import (  # type: ignore
+        PackageNotFoundError,  # type: ignore
+        version,  # type: ignore
+    )
 else:
-    from importlib_metadata import PackageNotFoundError, version  # pragma: no cover
+    from importlib_metadata import (
+        PackageNotFoundError,  # type: ignore
+        version,  # type: ignore
+    )
 
 try:
-    # Change here if project is renamed and does not equal the package name
-    dist_name = "BiocFrame"
-    __version__ = version(dist_name)
+    __version__: str = version(__name__.rsplit(".", 1)[0])  # type: ignore
 except PackageNotFoundError:  # pragma: no cover
-    __version__ = "unknown"
+    __version__: str = "unknown"
 finally:
     del version, PackageNotFoundError
 
-from .BiocFrame import BiocFrame
-from .io import from_pandas
+from .BiocFrame import BiocFrame as BiocFrame
diff --git a/src/biocframe/_type_checks.py b/src/biocframe/_type_checks.py
index 84db192..f72ffe3 100644
--- a/src/biocframe/_type_checks.py
+++ b/src/biocframe/_type_checks.py
@@ -1,11 +1,14 @@
-from typing import Any, Callable
+"""Checks for types of objects."""
+
+from collections.abc import Sequence as c_Sequence
+from typing import Any, Sequence
 
 __author__ = "jkanche, keviny2"
 __copyright__ = "jkanche"
 __license__ = "MIT"
 
 
-def is_list_of_type(x: Any, target_type: Callable) -> bool:
+def is_list_of_type(x: Sequence[Any], target_type: type) -> bool:
     """Checks if ``x`` is a list, and whether all elements of the list are of the same type.
 
     Args:
@@ -15,6 +18,4 @@ def is_list_of_type(x: Any, target_type: Callable) -> bool:
     Returns:
         bool: True if ``x`` is :py:class:`list` and all elements are of the same type.
     """
-    return (isinstance(x, list) or isinstance(x, tuple)) and all(
-        isinstance(item, target_type) for item in x
-    )
+    return (x, c_Sequence) and all(isinstance(item, target_type) for item in x)
diff --git a/src/biocframe/_validators.py b/src/biocframe/_validators.py
index fec8425..b08efae 100644
--- a/src/biocframe/_validators.py
+++ b/src/biocframe/_validators.py
@@ -1,5 +1,8 @@
-from collections import OrderedDict
-from typing import Dict, List, MutableMapping, Optional, Sequence, Tuple, Union
+"""Validators for :py:class:`~biocframe.BiocFrame.BiocFrame` object."""
+
+from typing import Any, List, Optional, Tuple
+
+from .types import DataType
 
 __author__ = "jkanche"
 __copyright__ = "jkanche"
@@ -7,18 +10,18 @@
 
 
 def validate_rows(
-    data: MutableMapping[str, Union[Sequence, MutableMapping]],
-    number_of_rows: Optional[int],
-    row_names: Optional[Sequence[str]],
+    data: DataType,
+    number_of_rows: Optional[int] = None,
+    row_names: Optional[List[str]] = None,
 ) -> int:
     """Validate rows of :py:class:`~biocframe.BiocFrame.BiocFrame` object.
 
     Args:
-        data (MutableMapping[str, Union[Sequence, MutableMapping]], optional):
+        data (MutableMapping[str, Union[List, MutableMapping]], optional):
             Dictionary of columns and their values. all columns must have the
             same length. Defaults to {}.
         number_of_rows (int, optional): Number of rows.
-        row_names (Sequence[str], optional): Row index values.
+        row_names (List[str], optional): Row index values.
 
 
     Raises:
@@ -28,63 +31,64 @@ def validate_rows(
     Returns:
         int: Validated number of rows in ``data``.
     """
-    incorrect_len_keys = []
-    for k, v in data.items():
-        tmpLen = len(v)
-
-        if number_of_rows is None:
-            number_of_rows = tmpLen
-        elif number_of_rows != tmpLen:
-            incorrect_len_keys.append(k)
-
-    if len(incorrect_len_keys) > 0:
+    lengths = {k: len(v) for k, v in data.items()}
+    mean_len = sum(lengths.values()) / len(lengths.values())
+    int_mean_len = int(mean_len)
+
+    if int_mean_len != mean_len or (
+        number_of_rows is not None and int_mean_len != number_of_rows
+    ):
+        number_of_rows = (
+            int_mean_len if number_of_rows is None else number_of_rows
+        )
+        bad_rows = [k for k, v in lengths.items() if v != number_of_rows]
         raise ValueError(
             "`BiocFrame` expects all columns in ``data`` to be of equal"
-            f"length, these columns do not: {', '.join(incorrect_len_keys)}."
+            f"length, but these are not: {bad_rows}."
         )
+    else:
+        number_of_rows = int_mean_len
 
     if row_names is not None:
         if not validate_unique_list(row_names):
             raise ValueError("`row_names` must be unique!")
 
-        if number_of_rows is None:
-            number_of_rows = len(row_names)
-        else:
-            if len(row_names) != number_of_rows:
-                raise ValueError(
-                    "Length of `row_names` and `number_of_rows` do not match, "
-                    f"l{len(row_names)} != {number_of_rows}"
-                )
+        if len(row_names) != number_of_rows:
+            raise ValueError(
+                "Length of `row_names` and `number_of_rows` do not match, "
+                f"l{len(row_names)} != {number_of_rows}"
+            )
 
     return number_of_rows
 
 
 def validate_cols(
-    column_names: Sequence[str],
-    data: MutableMapping[str, Union[Sequence, MutableMapping]],
-) -> Tuple[List[str], Dict[str, Union[Sequence, MutableMapping]]]:
+    column_names: Optional[List[str]] = None,
+    data: DataType = {},
+) -> Tuple[List[str], DataType]:
     """Validate columns of a :py:class:`biocframe.BiocFrame` object.
 
     Args:
-        column_names (Sequence[str], optional): Column names, if not provided,
+        column_names (List[str], optional): Column names, if not provided,
             its automatically inferred from data. Defaults to None.
-        data (MutableMapping[str, Union[Sequence, MutableMapping]], optional):
+        data (MutableMapping[str, Union[List, MutableMapping]], optional):
             a dictionary of columns and their values. all columns must have the
-            same length. Defaults to {}. Defaults to {}.
+            same length. Defaults to {}.
 
     Raises:
         ValueError: When ``column_names`` do not match the keys from ``data``.
         TypeError: Incorrect column type.
 
     Returns:
-        Sequence[str]: List of columns names.
+        List[str]: List of columns names.
     """
     if column_names is None:
         column_names = list(data.keys())
     else:
         if len(column_names) != len(data.keys()):
             raise ValueError(
-                "Number of columns mismatch between `column_names` and `data`."
+                "Mismatch in number of columns between 'column_names' and "
+                "'data`'."
             )
 
         if len(set(column_names).difference(data.keys())) > 0:
@@ -97,19 +101,13 @@ def validate_cols(
                 "Not all columns from `data` are present in `column_names`."
             )
 
-    # Technically should throw an error but
-    # lets just fix it
-    # column names and dict order should be the same
-    incorrect_types = []
-    new_odata = OrderedDict()
-    for k in column_names:
-        # check for types
-        col_value = data[k]
-
-        if not (hasattr(col_value, "__len__") and hasattr(col_value, "__getitem__")):
-            incorrect_types.append(k)
-
-        new_odata[k] = data[k]
+    # Technically should throw an error but lets just fix it column names and
+    # dict order should be the same
+    incorrect_types: List[str] = [
+        k
+        for k, v in data.items()
+        if not (hasattr(v, "__len__") and hasattr(v, "__getitem__"))
+    ]
 
     if len(incorrect_types) > 0:
         raise TypeError(
@@ -118,16 +116,14 @@ def validate_cols(
             f"{', '.join(incorrect_types)}."
         )
 
-    data = new_odata
-
     return column_names, data
 
 
-def validate_unique_list(values: Sequence) -> bool:
+def validate_unique_list(values: List[Any]) -> bool:
     """Validate if ``values`` contains unique values.
 
     Args:
-        values (Sequence): List to check.
+        values (List): List to check.
 
     Returns:
         bool: `True` if all values are unique else False.
diff --git a/src/biocframe/io/from_pandas.py b/src/biocframe/io/from_pandas.py
index 644d429..665c956 100644
--- a/src/biocframe/io/from_pandas.py
+++ b/src/biocframe/io/from_pandas.py
@@ -1,3 +1,5 @@
+from pandas import DataFrame
+
 from ..BiocFrame import BiocFrame
 
 __author__ = "jkanche"
@@ -17,7 +19,6 @@ def from_pandas(input: "DataFrame") -> BiocFrame:
     Returns:
         BiocFrame: A :py:class:`~biocframe.BiocFrame.BiocFrame` object.
     """
-
     from pandas import DataFrame
 
     if not isinstance(input, DataFrame):
@@ -29,4 +30,6 @@ def from_pandas(input: "DataFrame") -> BiocFrame:
     if input.index is not None:
         rindex = input.index.to_list()
 
-    return BiocFrame(data=rdata, row_names=rindex, column_names=input.columns.to_list())
+    return BiocFrame(
+        data=rdata, row_names=rindex, column_names=input.columns.to_list()
+    )
diff --git a/src/biocframe/types.py b/src/biocframe/types.py
index 5686a50..44889cf 100644
--- a/src/biocframe/types.py
+++ b/src/biocframe/types.py
@@ -1,8 +1,43 @@
-from typing import Optional, Sequence, Tuple, Union
+"""Custom types for biocframe."""
+
+from typing import Any, Dict, List, Protocol, Tuple, Union, runtime_checkable
 
 __author__ = "jkanche"
 __copyright__ = "jkanche"
 __license__ = "MIT"
 
-SlicerTypes = Union[Sequence[int], Sequence[bool], Sequence[str], slice, int, str]
-SlicerArgTypes = Union[Sequence[str], Tuple[SlicerTypes, Optional[SlicerTypes]]]
+SimpleSlice = Union[slice, List[int]]
+AtomicSlice = Union[int, bool, str]
+RangeSlice = Union[
+    List[AtomicSlice],
+    slice,
+    Tuple[Union[AtomicSlice, slice], Union[AtomicSlice, slice, None]],
+]
+AllSlice = Union[RangeSlice, AtomicSlice]
+
+
+@runtime_checkable
+class BiocSeq(Protocol):
+    """The protocol for data types."""
+
+    @property
+    def shape(self) -> List[int]:
+        """Return the shape of the data."""
+        ...
+
+    def __getitem__(self, __key: Any) -> Any:
+        """Slice the data."""
+        ...
+
+    def __len__(self) -> int:
+        """Return the length of the data."""
+        ...
+
+
+ColType = Union[Dict[str, Any], List[Any], BiocSeq]
+DataType = Union[
+    Dict[str, ColType],
+    Dict[str, Dict[str, Any]],
+    Dict[str, List[Any]],
+    Dict[str, BiocSeq],
+]
diff --git a/src/biocframe/utils.py b/src/biocframe/utils.py
index 7c578da..a49e14d 100644
--- a/src/biocframe/utils.py
+++ b/src/biocframe/utils.py
@@ -1,29 +1,31 @@
-from typing import Any, List, Sequence, Tuple, Union
-from warnings import warn
+"""Utility functions for biocframe."""
 
+from typing import Any, List, Tuple, cast
+from warnings import warn
 
 from ._type_checks import is_list_of_type
-from .types import SlicerTypes
+from .types import AllSlice, SimpleSlice
 
 __author__ = "jkanche"
 __copyright__ = "jkanche"
 __license__ = "MIT"
 
 
-def _match_to_indices(
-    data: Sequence, query: SlicerTypes
-) -> Tuple[Union[slice, List[int]], bool]:
+def match_to_indices(
+    data: List[Any], query: AllSlice
+) -> Tuple[SimpleSlice, bool]:
     """Utility function to make slicer arguments more palatable.
 
     Args:
-        data (Sequence): Input data array to slice.
-        query (SlicerTypes): Either a slice or
-            a list of indices to keep.
+        data (List): Input data array to slice.
+        query (SlicerTypes): Either a slice or a list of int indices to keep.
 
     Returns:
-        Tuple[Union[slice, List[int]], bool]: Resolved list of indices and if its a unary slice.
+        SlicerTypes:
+            Resolved list of indices.
+        bool:
+            `True` if its a unary slice.
     """
-
     resolved_indices = None
     is_unary = False
 
@@ -38,16 +40,18 @@ def _match_to_indices(
     elif isinstance(query, slice):
         # resolved_indices = list(range(len(data))[query])
         resolved_indices = query
-    elif isinstance(query, list) or isinstance(query, tuple):
+    else:
         if is_list_of_type(query, bool):
             if len(query) != len(data):
                 warn(
                     "`indices` is a boolean vector, length should match the size of the data."
                 )
 
-            resolved_indices = [i for i in range(len(query)) if query[i] is True]
+            resolved_indices = [
+                i for i in range(len(query)) if query[i] is True
+            ]
         elif is_list_of_type(query, int):
-            resolved_indices = query
+            resolved_indices = cast(List[int], query)
         elif is_list_of_type(query, str):
             diff = list(set(query).difference(set(data)))
             if len(diff) > 0:
@@ -58,17 +62,27 @@ def _match_to_indices(
             resolved_indices = [data.index(i) for i in query]
         else:
             raise TypeError("`indices` is a list of unsupported types!")
-    else:
-        raise TypeError("`indices` is unsupported!")
 
     return resolved_indices, is_unary
 
 
-def _slice_or_index(data: Any, query: Union[slice, List[int]]):
-    sliced_data = None
+def slice_or_index(data: Any, query: SimpleSlice) -> List[Any]:
+    """Utility function to slice or index data.
+
+    Args:
+        data (Any): Input data array to slice.
+        query (BasicSlice): Either a `slice` or a list of int indices to keep.
+
+    Returns:
+        List[Any]: The sliced data.
+
+    Raises:
+        TypeError: If the query is not a slice or a list.
+    """
+    sliced_data: List[Any]
     if isinstance(query, slice):
         sliced_data = data[query]
-    elif isinstance(query, list):
+    else:
         if not isinstance(data, list):
             try:
                 return data[query]
@@ -76,7 +90,5 @@ def _slice_or_index(data: Any, query: Union[slice, List[int]]):
                 pass
 
         sliced_data = [data[i] for i in query]
-    else:
-        raise TypeError("Cannot match column indices to a known operation!")
 
     return sliced_data
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 907308e..29db24d 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,4 +1,4 @@
-from biocframe.utils import _match_to_indices
+from biocframe.utils import match_to_indices
 
 __author__ = "jkanche"
 __copyright__ = "jkanche"
@@ -8,12 +8,12 @@
 def test_match_to_indices():
     obj = ["b", "n", "m"]
 
-    sliced_ind, is_unary = _match_to_indices(obj, query=[0, 2])
+    sliced_ind, is_unary = match_to_indices(obj, query=[0, 2])
     assert sliced_ind is not None
     assert len(sliced_ind) == 2
     assert sliced_ind == [0, 2]
 
-    sliced_ind, is_unary = _match_to_indices(obj, query=["b", "n"])
+    sliced_ind, is_unary = match_to_indices(obj, query=["b", "n"])
     assert sliced_ind is not None
     assert sliced_ind == [0, 1]
     assert len(sliced_ind) == 2

From cc2d38bb0adbfcfaa6c91835d7f54d5b39de11ee Mon Sep 17 00:00:00 2001
From: Max Hargreaves <hargreaves.max@gene.com>
Date: Wed, 20 Sep 2023 10:51:18 -0700
Subject: [PATCH 2/6] Change: finished changes

---
 src/biocframe/BiocFrame.py | 102 +++++++++++++++++--------------------
 src/biocframe/types.py     |  34 +++++++++----
 2 files changed, 72 insertions(+), 64 deletions(-)

diff --git a/src/biocframe/BiocFrame.py b/src/biocframe/BiocFrame.py
index 79ffa9f..a5609dc 100644
--- a/src/biocframe/BiocFrame.py
+++ b/src/biocframe/BiocFrame.py
@@ -19,10 +19,12 @@
 from .types import (
     AllSlice,
     AtomicSlice,
-    BiocSeq,
+    BiocCol,
+    ColSlice,
     ColType,
     DataType,
     RangeSlice,
+    RowSlice,
     SimpleSlice,
 )
 from .utils import match_to_indices, slice_or_index
@@ -173,40 +175,39 @@ def __repr__(self) -> str:
         table = PrettyTable(padding_width=1)
         table.field_names = [str(col) for col in self._column_names]
 
-        _rows = []
-        rows_to_show = 2
-        _top = self.shape[0]
-        if _top > rows_to_show:
-            _top = rows_to_show
+        num_rows = self.shape[0]
+        # maximum number of top and bottom rows to show
+        max_shown_rows = 3
 
-        # top three rows
-        for r in range(_top):
-            _row = self.row(r)
-            vals = list(_row.values())
-            res = [str(v) for v in vals]
-            _rows.append(res)
+        max_top_row = max_shown_rows if num_rows > max_shown_rows else num_rows
 
-        if self.shape[0] > 2 * rows_to_show:
-            # add ...
-            _rows.append(["..." for _ in range(len(self._column_names))])
+        min_last_row = num_rows - max_shown_rows
+        if min_last_row <= 0:
+            min_last_row = None
+        elif min_last_row < max_top_row:
+            min_last_row = max_top_row
 
-        _last = self.shape[0] - _top
-        if _last <= rows_to_show:
-            _last = self.shape[0] - _top
+        rows: List[List[str]] = []
 
-        # last three rows
-        for r in range(_last + 1, len(self)):
-            _row = self.row(r)
-            vals = list(_row.values())
-            res = [str(v) for v in vals]
-            _rows.append(res)
+        # up to top three rows
+        for r in range(max_top_row):
+            rows.append([str(val) for val in self.row(r).values()])
 
-        table.add_rows(_rows)
+        if min_last_row is not None:
+            if num_rows > (max_shown_rows * 2):
+                # add ... to the middle row
+                rows.append(["..." for _ in range(len(self._column_names))])
+
+            # up to last three rows
+            for r in range(min_last_row, num_rows):
+                rows.append([str(val) for val in self.row(r).values()])
+
+        table.add_rows(rows)  # type: ignore
 
         pattern = (
-            f"BiocFrame with {self.dims[0]} rows & {self.dims[1]} columns \n"
-            f"contains row names?: {self.row_names is not None} \n"
-            f"{table.get_string()}"
+            f"BiocFrame with {num_rows} rows & {self.dims[1]} columns \n"
+            f"with row names: {self.row_names is not None} \n"
+            f"{table.get_string()}"  # type: ignore
         )
 
         return pattern
@@ -326,24 +327,16 @@ def has_column(self, name: str) -> bool:
     @overload
     def _slice(
         self,
-        row_indices_or_names: Optional[AtomicSlice],
-        column_indices_or_names: Optional[AtomicSlice],
+        row_indices_or_names: AtomicSlice,
+        column_indices_or_names: None,
     ) -> Dict[str, Any]:
         ...
 
     @overload
     def _slice(
         self,
-        row_indices_or_names: Optional[RangeSlice],
-        column_indices_or_names: Optional[RangeSlice],
-    ) -> ItemType:
-        ...
-
-    @overload
-    def _slice(
-        self,
-        row_indices_or_names: Union[AtomicSlice, slice],
-        column_indices_or_names: Union[AtomicSlice, slice],
+        row_indices_or_names: Optional[AllSlice],
+        column_indices_or_names: Union[AllSlice, None],
     ) -> ItemType:
         ...
 
@@ -401,7 +394,7 @@ def _slice(
             new_number_of_rows = len(new_row_names)
 
             for k, v in new_data.items():
-                if isinstance(v, BiocSeq):
+                if isinstance(v, BiocCol):
                     tmp: List[SimpleSlice] = [slice(None)] * len(v.shape)
                     tmp[0] = new_row_indices
                     new_data[k] = v[(*tmp,)]
@@ -433,11 +426,13 @@ def _slice(
         )
 
     @overload
-    def __getitem__(self, __key: AtomicSlice) -> Dict[str, Any]:
+    def __getitem__(self, __key: Union[RangeSlice, ColSlice]) -> ItemType:
         ...
 
     @overload
-    def __getitem__(self, __key: RangeSlice) -> ItemType:
+    def __getitem__(
+        self, __key: Union[AtomicSlice, RowSlice]
+    ) -> Dict[str, Any]:
         ...
 
     # TODO: implement in-place or views
@@ -525,24 +520,21 @@ def __getitem__(self, __key: AllSlice) -> ItemType:
             # column names if everything is a string
             if is_list_of_type(__key, str):
                 return self._slice(None, __key)
-            elif is_list_of_type(__key, int):
+
+            if is_list_of_type(__key, int):
                 return self._slice(__key, None)
-            elif is_list_of_type(__key, bool):
+
+            if is_list_of_type(__key, bool):
                 return self._slice(__key, None)
-            else:
-                raise ValueError("`args` is not supported.")
 
-        # tuple
-        if len(__key) == 0:
-            raise ValueError("`args` must contain at least one slice.")
+            raise ValueError("`args` is not supported.")
 
-        if len(__key) == 1:
-            return self._slice(__key[0], None)
-        elif len(__key) == 2:
-            return self._slice(__key[0], __key[1])
-        else:
+        # tuple of two elements
+        if len(__key) != 2:
             raise ValueError("Length of `args` is more than 2.")
 
+        return self._slice(__key[0], __key[1])
+
     def column(self, index_or_name: AtomicSlice) -> ItemType:
         """Access a column by integer position or column label.
 
diff --git a/src/biocframe/types.py b/src/biocframe/types.py
index 44889cf..ebf5619 100644
--- a/src/biocframe/types.py
+++ b/src/biocframe/types.py
@@ -1,23 +1,39 @@
 """Custom types for biocframe."""
 
-from typing import Any, Dict, List, Protocol, Tuple, Union, runtime_checkable
+from typing import (
+    Any,
+    Dict,
+    List,
+    Protocol,
+    Sequence,
+    Tuple,
+    Union,
+    runtime_checkable,
+)
 
 __author__ = "jkanche"
 __copyright__ = "jkanche"
 __license__ = "MIT"
 
-SimpleSlice = Union[slice, List[int]]
-AtomicSlice = Union[int, bool, str]
+SimpleSlice = Union[slice, Sequence[int]]
+
+AtomicSlice = Union[int, str]
+ListSlice = List[Union[AtomicSlice, bool]]
 RangeSlice = Union[
-    List[AtomicSlice],
+    ListSlice,
     slice,
-    Tuple[Union[AtomicSlice, slice], Union[AtomicSlice, slice, None]],
+    Tuple[
+        Union[ListSlice, slice],
+        Union[ListSlice, slice, None],
+    ],
 ]
-AllSlice = Union[RangeSlice, AtomicSlice]
+RowSlice = Tuple[AtomicSlice, "AllSlice"]
+ColSlice = Tuple["AllSlice", AtomicSlice]
+AllSlice = Union[RangeSlice, AtomicSlice, RowSlice, ColSlice]
 
 
 @runtime_checkable
-class BiocSeq(Protocol):
+class BiocCol(Protocol):
     """The protocol for data types."""
 
     @property
@@ -34,10 +50,10 @@ def __len__(self) -> int:
         ...
 
 
-ColType = Union[Dict[str, Any], List[Any], BiocSeq]
+ColType = Union[Dict[str, Any], List[Any], BiocCol]
 DataType = Union[
     Dict[str, ColType],
     Dict[str, Dict[str, Any]],
     Dict[str, List[Any]],
-    Dict[str, BiocSeq],
+    Dict[str, BiocCol],
 ]

From c3b3c14dbb827f81f8d654a9d86da13fee65dc17 Mon Sep 17 00:00:00 2001
From: Max Hargreaves <hargreaves.max@gene.com>
Date: Wed, 20 Sep 2023 13:11:43 -0700
Subject: [PATCH 3/6] Change: final-ish

---
 setup.cfg                       |  2 +-
 src/biocframe/BiocFrame.py      | 69 ++++++++++++++-------------------
 src/biocframe/__init__.py       |  1 +
 src/biocframe/_validators.py    | 14 +++++--
 src/biocframe/io/from_pandas.py | 30 +++++++-------
 src/biocframe/types.py          | 28 +++++++------
 src/biocframe/utils.py          | 25 +++++++++++-
 tests/test_initialize.py        | 14 +++++--
 tests/test_methods.py           | 19 +++++++--
 tests/test_readme.py            | 14 +++++--
 tests/test_utils.py             |  4 +-
 11 files changed, 129 insertions(+), 91 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index b8fa774..ef9a41b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -50,7 +50,6 @@ python_requires = >=3.8
 install_requires =
     importlib-metadata; python_version<"3.8"
     prettytable
-    pandas
 
 [options.packages.find]
 where = src
@@ -69,6 +68,7 @@ testing =
     pytest
     pytest-cov
     numpy
+    pandas
 
 [options.entry_points]
 # Add here console scripts like:
diff --git a/src/biocframe/BiocFrame.py b/src/biocframe/BiocFrame.py
index a5609dc..7bea937 100644
--- a/src/biocframe/BiocFrame.py
+++ b/src/biocframe/BiocFrame.py
@@ -23,14 +23,15 @@
     ColSlice,
     ColType,
     DataType,
-    RangeSlice,
     RowSlice,
+    SeqSlice,
     SimpleSlice,
+    TupleSlice,
 )
 from .utils import match_to_indices, slice_or_index
 
 try:
-    from pandas import DataFrame, RangeIndex
+    from pandas import DataFrame
 except Exception:
     pass
 
@@ -149,27 +150,6 @@ def __init__(
         self._number_of_columns = len(self._column_names)
         self._metadata = metadata
 
-    @classmethod
-    def from_pandas(cls, df: "DataFrame") -> "BiocFrame":
-        """Read a :py:class:`~biocframe.BiocFrame.BiocFrame` from :py:class:`~pandas.DataFrame` object.
-
-        Args:
-            df (:py:class:`~pandas.DataFrame`): Input data.
-
-        Raises:
-            TypeError: If ``input`` is not a :py:class:`~pandas.DataFrame`.
-
-        Returns:
-            BiocFrame: A :py:class:`~biocframe.BiocFrame.BiocFrame` object.
-        """
-        r_data: Dict[str, List[Any]] = df.to_dict("list")  # type: ignore
-        r_index: Optional[List[str]] = None
-
-        if df.index is not RangeIndex:  # type: ignore
-            r_index = df.index.to_list()  # type: ignore
-
-        return BiocFrame(data=r_data, row_names=r_index)
-
     def __repr__(self) -> str:
         """Get a machine-readable string representation of the object."""
         table = PrettyTable(padding_width=1)
@@ -426,7 +406,7 @@ def _slice(
         )
 
     @overload
-    def __getitem__(self, __key: Union[RangeSlice, ColSlice]) -> ItemType:
+    def __getitem__(self, __key: Union[SeqSlice, slice, ColSlice]) -> ItemType:
         ...
 
     @overload
@@ -435,6 +415,10 @@ def __getitem__(
     ) -> Dict[str, Any]:
         ...
 
+    @overload
+    def __getitem__(self, __key: TupleSlice) -> "BiocFrame":
+        ...
+
     # TODO: implement in-place or views
     def __getitem__(self, __key: AllSlice) -> ItemType:
         """Subset the data frame.
@@ -576,7 +560,7 @@ def row(self, index_or_name: AtomicSlice) -> Dict[str, Any]:
         return self[index_or_name, :]
 
     # TODO: implement in-place or views
-    def __setitem__(self, name: str, value: ColType) -> None:
+    def __setitem__(self, __key: str, __value: ColType) -> None:
         """Add or re-assign a value to a column.
 
         Usage:
@@ -604,18 +588,18 @@ def __setitem__(self, name: str, value: ColType) -> None:
         Raises:
             ValueError: If length of ``value`` does not match the number of rows.
         """
-        if len(value) != self.shape[0]:
+        if len(__value) != self.shape[0]:
             raise ValueError(
                 "Length of `value`, does not match the number of the rows,"
-                f"need to be {self.shape[0]} but provided {len(value)}."
+                f"need to be {self.shape[0]} but provided {len(__value)}."
             )
 
-        if name not in self.column_names:
-            self._column_names.append(name)
+        if __key not in self.column_names:
+            self._column_names.append(__key)
             self._number_of_columns += 1
 
         # Dunno how to fix this one...
-        self._data[name] = value  # type: ignore
+        self._data[__key] = __value  # type: ignore
 
     # TODO: implement in-place or view
     def __delitem__(self, name: str):
@@ -648,7 +632,11 @@ def __delitem__(self, name: str):
         if name not in self.column_names:
             raise ValueError(f"Column: '{name}' does not exist.")
 
-        del self._data[name]
+        try:
+            del self._data[name]  # type: ignore
+        except Exception:
+            self._data = {k: v for k, v in self._data.items() if k != name}
+
         self._column_names.remove(name)
         self._number_of_columns -= 1
 
@@ -670,15 +658,16 @@ def to_pandas(self) -> "DataFrame":
         Returns:
             DataFrame: a :py:class:`~pandas.DataFrame` object.
         """
-        from pandas import DataFrame
-
         return DataFrame(
             data=self._data, index=self._row_names, columns=self._column_names
         )
 
     # TODO: very primitive implementation, needs very robust testing
     # TODO: implement in-place, view
-    def __array_ufunc__(self, func, method, *inputs, **kwargs) -> "BiocFrame":
+
+    def __array_ufunc__(
+        self, ufunc: Any, method: str, *inputs: Any, **kwargs: Any
+    ) -> "BiocFrame":
         """Interface with NumPy array methods.
 
         Usage:
@@ -696,16 +685,16 @@ def __array_ufunc__(self, func, method, *inputs, **kwargs) -> "BiocFrame":
         """
         from pandas import Series
 
-        input = inputs[0]
-        if not isinstance(input, BiocFrame):
+        _input = inputs[0]
+        if not isinstance(_input, BiocFrame):
             raise TypeError("Input is not a `BiocFrame` object.")
 
         for col in self.column_names:
-            if is_numeric_dtype(Series(input.column(col))):
-                new_col = getattr(func, method)(input.column(col), **kwargs)
-                input[col] = new_col
+            if is_numeric_dtype(Series(_input.column(col))):  # type: ignore
+                new_col = getattr(ufunc, method)(_input.column(col), **kwargs)
+                _input[col] = new_col
 
-        return input
+        return _input
 
     ###########################################################################
     # compatibility with Pandas
diff --git a/src/biocframe/__init__.py b/src/biocframe/__init__.py
index 08f9351..4d7b5f2 100644
--- a/src/biocframe/__init__.py
+++ b/src/biocframe/__init__.py
@@ -19,3 +19,4 @@
     del version, PackageNotFoundError
 
 from .BiocFrame import BiocFrame as BiocFrame
+from .io import from_pandas as from_pandas
diff --git a/src/biocframe/_validators.py b/src/biocframe/_validators.py
index b08efae..98e5cd5 100644
--- a/src/biocframe/_validators.py
+++ b/src/biocframe/_validators.py
@@ -32,16 +32,22 @@ def validate_rows(
         int: Validated number of rows in ``data``.
     """
     lengths = {k: len(v) for k, v in data.items()}
-    mean_len = sum(lengths.values()) / len(lengths.values())
+    mean_len = (
+        sum(lengths.values()) / len(lengths.values())
+        if len(lengths) > 0
+        else 0
+    )
     int_mean_len = int(mean_len)
 
-    if int_mean_len != mean_len or (
+    if int_mean_len == 0:
+        number_of_rows = number_of_rows if number_of_rows is not None else 0
+    elif int_mean_len != mean_len or (
         number_of_rows is not None and int_mean_len != number_of_rows
     ):
-        number_of_rows = (
+        expected_num_rows = (
             int_mean_len if number_of_rows is None else number_of_rows
         )
-        bad_rows = [k for k, v in lengths.items() if v != number_of_rows]
+        bad_rows = [k for k, v in lengths.items() if v != expected_num_rows]
         raise ValueError(
             "`BiocFrame` expects all columns in ``data`` to be of equal"
             f"length, but these are not: {bad_rows}."
diff --git a/src/biocframe/io/from_pandas.py b/src/biocframe/io/from_pandas.py
index 665c956..2b13640 100644
--- a/src/biocframe/io/from_pandas.py
+++ b/src/biocframe/io/from_pandas.py
@@ -1,17 +1,24 @@
-from pandas import DataFrame
+"""A function for converting from pandas.DataFrame to BiocFrame."""
+
+from typing import Any, Dict, List, Optional
 
 from ..BiocFrame import BiocFrame
 
+try:
+    from pandas import DataFrame, RangeIndex
+except Exception:
+    pass
+
 __author__ = "jkanche"
 __copyright__ = "jkanche"
 __license__ = "MIT"
 
 
-def from_pandas(input: "DataFrame") -> BiocFrame:
+def from_pandas(df: "DataFrame") -> "BiocFrame":
     """Read a :py:class:`~biocframe.BiocFrame.BiocFrame` from :py:class:`~pandas.DataFrame` object.
 
     Args:
-        input (:py:class:`~pandas.DataFrame`): Input data.
+        df (:py:class:`~pandas.DataFrame`): Input data.
 
     Raises:
         TypeError: If ``input`` is not a :py:class:`~pandas.DataFrame`.
@@ -19,17 +26,10 @@ def from_pandas(input: "DataFrame") -> BiocFrame:
     Returns:
         BiocFrame: A :py:class:`~biocframe.BiocFrame.BiocFrame` object.
     """
-    from pandas import DataFrame
-
-    if not isinstance(input, DataFrame):
-        raise TypeError("data is not a pandas `DataFrame` object.")
-
-    rdata = input.to_dict("list")
-    rindex = None
+    r_data: Dict[str, List[Any]] = df.to_dict("list")  # type: ignore
+    r_index: Optional[List[str]] = None
 
-    if input.index is not None:
-        rindex = input.index.to_list()
+    if df.index is not RangeIndex:  # type: ignore
+        r_index = df.index.to_list()  # type: ignore
 
-    return BiocFrame(
-        data=rdata, row_names=rindex, column_names=input.columns.to_list()
-    )
+    return BiocFrame(data=r_data, row_names=r_index)
diff --git a/src/biocframe/types.py b/src/biocframe/types.py
index ebf5619..11e4ef6 100644
--- a/src/biocframe/types.py
+++ b/src/biocframe/types.py
@@ -2,8 +2,8 @@
 
 from typing import (
     Any,
-    Dict,
     List,
+    Mapping,
     Protocol,
     Sequence,
     Tuple,
@@ -18,15 +18,12 @@
 SimpleSlice = Union[slice, Sequence[int]]
 
 AtomicSlice = Union[int, str]
-ListSlice = List[Union[AtomicSlice, bool]]
-RangeSlice = Union[
-    ListSlice,
-    slice,
-    Tuple[
-        Union[ListSlice, slice],
-        Union[ListSlice, slice, None],
-    ],
+SeqSlice = Sequence[Union[AtomicSlice, bool]]
+TupleSlice = Tuple[
+    Union[SeqSlice, slice],
+    Union[SeqSlice, slice, None],
 ]
+RangeSlice = Union[SeqSlice, slice, TupleSlice]
 RowSlice = Tuple[AtomicSlice, "AllSlice"]
 ColSlice = Tuple["AllSlice", AtomicSlice]
 AllSlice = Union[RangeSlice, AtomicSlice, RowSlice, ColSlice]
@@ -37,7 +34,7 @@ class BiocCol(Protocol):
     """The protocol for data types."""
 
     @property
-    def shape(self) -> List[int]:
+    def shape(self) -> Sequence[int]:
         """Return the shape of the data."""
         ...
 
@@ -50,10 +47,11 @@ def __len__(self) -> int:
         ...
 
 
-ColType = Union[Dict[str, Any], List[Any], BiocCol]
+# Mapping is necessary as it is covariant which MutableMapping, etc. are not.
+ColType = Union[Mapping[str, Any], List[Any], BiocCol]
 DataType = Union[
-    Dict[str, ColType],
-    Dict[str, Dict[str, Any]],
-    Dict[str, List[Any]],
-    Dict[str, BiocCol],
+    Mapping[str, ColType],
+    Mapping[str, Mapping[str, Any]],
+    Mapping[str, List[Any]],
+    Mapping[str, BiocCol],
 ]
diff --git a/src/biocframe/utils.py b/src/biocframe/utils.py
index a49e14d..1d20123 100644
--- a/src/biocframe/utils.py
+++ b/src/biocframe/utils.py
@@ -1,16 +1,37 @@
 """Utility functions for biocframe."""
 
-from typing import Any, List, Tuple, cast
+from typing import Any, List, Sequence, Tuple, Union, cast, overload
 from warnings import warn
 
 from ._type_checks import is_list_of_type
-from .types import AllSlice, SimpleSlice
+from .types import (
+    AllSlice,
+    AtomicSlice,
+    ColSlice,
+    RowSlice,
+    SeqSlice,
+    SimpleSlice,
+    TupleSlice,
+)
 
 __author__ = "jkanche"
 __copyright__ = "jkanche"
 __license__ = "MIT"
 
 
+@overload
+def match_to_indices(data: List[Any], query: slice) -> Tuple[slice, bool]:
+    ...
+
+
+@overload
+def match_to_indices(
+    data: List[Any],
+    query: Union[SeqSlice, TupleSlice, AtomicSlice, RowSlice, ColSlice],
+) -> Tuple[Sequence[int], bool]:
+    ...
+
+
 def match_to_indices(
     data: List[Any], query: AllSlice
 ) -> Tuple[SimpleSlice, bool]:
diff --git a/tests/test_initialize.py b/tests/test_initialize.py
index 3c9c5d4..264d721 100644
--- a/tests/test_initialize.py
+++ b/tests/test_initialize.py
@@ -2,7 +2,7 @@
 import pytest
 
 import biocframe
-from biocframe.BiocFrame import BiocFrame
+from biocframe import BiocFrame, from_pandas
 
 __author__ = "jkanche"
 __copyright__ = "jkanche"
@@ -42,7 +42,10 @@ def test_initialize_pandas():
                 {
                     "ncol1": [4, 5, 6],
                     "ncol2": ["a", "b", "c"],
-                    "deep": {"dcol1": ["j", "k", "l"], "dcol2": ["a", "s", "l"]},
+                    "deep": {
+                        "dcol1": ["j", "k", "l"],
+                        "dcol2": ["a", "s", "l"],
+                    },
                 },
                 {
                     "ncol2": ["a"],
@@ -57,7 +60,7 @@ def test_initialize_pandas():
         }
     )
 
-    bframe = biocframe.from_pandas(df_gr)
+    bframe = from_pandas(df_gr)
     assert bframe is not None
 
 
@@ -80,7 +83,10 @@ def test_should_fail():
                     {
                         "ncol1": [4, 5, 6],
                         "ncol2": ["a", "b", "c"],
-                        "deep": {"dcol1": ["j", "k", "l"], "dcol2": ["a", "s", "l"]},
+                        "deep": {
+                            "dcol1": ["j", "k", "l"],
+                            "dcol2": ["a", "s", "l"],
+                        },
                     },
                     {
                         "ncol2": ["a"],
diff --git a/tests/test_methods.py b/tests/test_methods.py
index 1447bab..da74481 100644
--- a/tests/test_methods.py
+++ b/tests/test_methods.py
@@ -200,7 +200,10 @@ def test_bframe_slice():
 
     assert slice is not None
     assert len(slice.column_names) == 2
-    assert len(list(set(slice.column_names).difference(["column1", "nested"]))) == 0
+    assert (
+        len(list(set(slice.column_names).difference(["column1", "nested"])))
+        == 0
+    )
 
     assert len(slice.dims) == 2
     assert slice.dims == (2, 2)
@@ -210,7 +213,12 @@ def test_bframe_slice():
     assert sliced_list is not None
     assert len(sliced_list.column_names) == 2
     assert (
-        len(list(set(sliced_list.column_names).difference(["column1", "nested"]))) == 0
+        len(
+            list(
+                set(sliced_list.column_names).difference(["column1", "nested"])
+            )
+        )
+        == 0
     )
 
     assert len(sliced_list.dims) == 2
@@ -336,7 +344,10 @@ def test_nested_biocFrame_slice():
 
     assert slice is not None
     assert len(slice.column_names) == 2
-    assert len(list(set(slice.column_names).difference(["column1", "nested"]))) == 0
+    assert (
+        len(list(set(slice.column_names).difference(["column1", "nested"])))
+        == 0
+    )
 
     assert len(slice.dims) == 2
     assert slice.dims == (2, 2)
@@ -372,7 +383,7 @@ def test_bframe_iter():
     assert bframe is not None
 
     iterCount = 0
-    for k, v in bframe:
+    for _ in bframe:
         iterCount += 1
 
     assert iterCount == bframe.dims[0]
diff --git a/tests/test_readme.py b/tests/test_readme.py
index 067db10..002d8de 100644
--- a/tests/test_readme.py
+++ b/tests/test_readme.py
@@ -22,10 +22,10 @@ def test_bframe():
             "chr3",
         ]
         * 20,
-        "starts": range(100, 300),
-        "ends": range(110, 310),
+        "starts": list(range(100, 300)),
+        "ends": list(range(110, 310)),
         "strand": ["-", "+", "+", "*", "*", "+", "+", "+", "-", "-"] * 20,
-        "score": range(0, 200),
+        "score": list(range(0, 200)),
         "GC": [random() for _ in range(10)] * 20,
     }
 
@@ -55,6 +55,12 @@ def test_bframe():
     assert sliced_df is not None
     assert sliced_df.dims == (4, 3)
     assert (
-        len(list(set(sliced_df.column_names).difference(["end", "strands", "scores"])))
+        len(
+            list(
+                set(sliced_df.column_names).difference(
+                    ["end", "strands", "scores"]
+                )
+            )
+        )
         == 0
     )
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 29db24d..88e43ff 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -8,12 +8,12 @@
 def test_match_to_indices():
     obj = ["b", "n", "m"]
 
-    sliced_ind, is_unary = match_to_indices(obj, query=[0, 2])
+    sliced_ind, _ = match_to_indices(obj, query=[0, 2])
     assert sliced_ind is not None
     assert len(sliced_ind) == 2
     assert sliced_ind == [0, 2]
 
-    sliced_ind, is_unary = match_to_indices(obj, query=["b", "n"])
+    sliced_ind, _ = match_to_indices(obj, query=["b", "n"])
     assert sliced_ind is not None
     assert sliced_ind == [0, 1]
     assert len(sliced_ind) == 2

From 9e6d4e47c7f7ab6bad249dae068f767cc9ebb190 Mon Sep 17 00:00:00 2001
From: Max Hargreaves <hargreaves.max@gene.com>
Date: Wed, 20 Sep 2023 13:29:51 -0700
Subject: [PATCH 4/6] Fix: metadata

---
 src/biocframe/BiocFrame.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/biocframe/BiocFrame.py b/src/biocframe/BiocFrame.py
index 79e1640..9619848 100644
--- a/src/biocframe/BiocFrame.py
+++ b/src/biocframe/BiocFrame.py
@@ -153,7 +153,7 @@ def __init__(
             column_names, self._data
         )
         self._number_of_columns = len(self._column_names)
-        self._metadata = metadata
+        self._metadata = {} if metadata is None else metadata
 
     def __repr__(self) -> str:
         """Get a machine-readable string representation of the object."""
@@ -284,7 +284,7 @@ def column_names(self, names: List[str]) -> None:
         }
 
     @property
-    def metadata(self) -> Optional[Dict[str, Any]]:
+    def metadata(self) -> Dict[str, Any]:
         """Get/set the metadata.
 
         Args:
@@ -296,7 +296,7 @@ def metadata(self) -> Optional[Dict[str, Any]]:
         return self._metadata
 
     @metadata.setter
-    def metadata(self, metadata: Optional[Dict[str, Any]]):
+    def metadata(self, metadata: Dict[str, Any]):
         self._metadata = metadata
 
     def has_column(self, name: str) -> bool:

From f7140991ab36454d3d61814c02eca3942762fde4 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 20 Sep 2023 20:59:56 +0000
Subject: [PATCH 5/6] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 setup.py                     |  2 +-
 src/biocframe/BiocFrame.py   | 12 +++---------
 src/biocframe/_validators.py | 13 +++----------
 src/biocframe/utils.py       |  8 ++------
 tests/test_methods.py        | 17 +++--------------
 tests/test_readme.py         |  8 +-------
 6 files changed, 13 insertions(+), 47 deletions(-)

diff --git a/setup.py b/setup.py
index 79c4d8d..f627d76 100644
--- a/setup.py
+++ b/setup.py
@@ -9,7 +9,7 @@
 if __name__ == "__main__":
     try:
         setup(use_scm_version={"version_scheme": "no-guess-dev"})
-    except:  # noqa
+    except:
         print(
             "\n\nAn error occurred while building the project, "
             "please ensure you have the most updated version of setuptools, "
diff --git a/src/biocframe/BiocFrame.py b/src/biocframe/BiocFrame.py
index 9619848..4fab415 100644
--- a/src/biocframe/BiocFrame.py
+++ b/src/biocframe/BiocFrame.py
@@ -149,9 +149,7 @@ def __init__(
         self._number_of_rows = validate_rows(
             self._data, number_of_rows, self._row_names
         )
-        self._column_names, self._data = validate_cols(
-            column_names, self._data
-        )
+        self._column_names, self._data = validate_cols(column_names, self._data)
         self._number_of_columns = len(self._column_names)
         self._metadata = {} if metadata is None else metadata
 
@@ -279,9 +277,7 @@ def column_names(self, names: List[str]) -> None:
             raise ValueError("Column names must be unique!")
 
         self._column_names = names
-        self._data = {
-            names[i]: v for i, (_, v) in enumerate(self.data.items())
-        }
+        self._data = {names[i]: v for i, (_, v) in enumerate(self.data.items())}
 
     @property
     def metadata(self) -> Dict[str, Any]:
@@ -414,9 +410,7 @@ def __getitem__(self, __key: Union[SeqSlice, slice, ColSlice]) -> ItemType:
         ...
 
     @overload
-    def __getitem__(
-        self, __key: Union[AtomicSlice, RowSlice]
-    ) -> Dict[str, Any]:
+    def __getitem__(self, __key: Union[AtomicSlice, RowSlice]) -> Dict[str, Any]:
         ...
 
     @overload
diff --git a/src/biocframe/_validators.py b/src/biocframe/_validators.py
index 98e5cd5..9c25c6a 100644
--- a/src/biocframe/_validators.py
+++ b/src/biocframe/_validators.py
@@ -32,11 +32,7 @@ def validate_rows(
         int: Validated number of rows in ``data``.
     """
     lengths = {k: len(v) for k, v in data.items()}
-    mean_len = (
-        sum(lengths.values()) / len(lengths.values())
-        if len(lengths) > 0
-        else 0
-    )
+    mean_len = sum(lengths.values()) / len(lengths.values()) if len(lengths) > 0 else 0
     int_mean_len = int(mean_len)
 
     if int_mean_len == 0:
@@ -44,9 +40,7 @@ def validate_rows(
     elif int_mean_len != mean_len or (
         number_of_rows is not None and int_mean_len != number_of_rows
     ):
-        expected_num_rows = (
-            int_mean_len if number_of_rows is None else number_of_rows
-        )
+        expected_num_rows = int_mean_len if number_of_rows is None else number_of_rows
         bad_rows = [k for k, v in lengths.items() if v != expected_num_rows]
         raise ValueError(
             "`BiocFrame` expects all columns in ``data`` to be of equal"
@@ -93,8 +87,7 @@ def validate_cols(
     else:
         if len(column_names) != len(data.keys()):
             raise ValueError(
-                "Mismatch in number of columns between 'column_names' and "
-                "'data`'."
+                "Mismatch in number of columns between 'column_names' and " "'data`'."
             )
 
         if len(set(column_names).difference(data.keys())) > 0:
diff --git a/src/biocframe/utils.py b/src/biocframe/utils.py
index 1d20123..cca16c7 100644
--- a/src/biocframe/utils.py
+++ b/src/biocframe/utils.py
@@ -32,9 +32,7 @@ def match_to_indices(
     ...
 
 
-def match_to_indices(
-    data: List[Any], query: AllSlice
-) -> Tuple[SimpleSlice, bool]:
+def match_to_indices(data: List[Any], query: AllSlice) -> Tuple[SimpleSlice, bool]:
     """Utility function to make slicer arguments more palatable.
 
     Args:
@@ -68,9 +66,7 @@ def match_to_indices(
                     "`indices` is a boolean vector, length should match the size of the data."
                 )
 
-            resolved_indices = [
-                i for i in range(len(query)) if query[i] is True
-            ]
+            resolved_indices = [i for i in range(len(query)) if query[i] is True]
         elif is_list_of_type(query, int):
             resolved_indices = cast(List[int], query)
         elif is_list_of_type(query, str):
diff --git a/tests/test_methods.py b/tests/test_methods.py
index e516dcd..2951b7b 100644
--- a/tests/test_methods.py
+++ b/tests/test_methods.py
@@ -200,10 +200,7 @@ def test_bframe_slice():
 
     assert slice is not None
     assert len(slice.column_names) == 2
-    assert (
-        len(list(set(slice.column_names).difference(["column1", "nested"])))
-        == 0
-    )
+    assert len(list(set(slice.column_names).difference(["column1", "nested"]))) == 0
 
     assert len(slice.dims) == 2
     assert slice.dims == (2, 2)
@@ -213,12 +210,7 @@ def test_bframe_slice():
     assert sliced_list is not None
     assert len(sliced_list.column_names) == 2
     assert (
-        len(
-            list(
-                set(sliced_list.column_names).difference(["column1", "nested"])
-            )
-        )
-        == 0
+        len(list(set(sliced_list.column_names).difference(["column1", "nested"]))) == 0
     )
 
     assert len(sliced_list.dims) == 2
@@ -344,10 +336,7 @@ def test_nested_biocFrame_slice():
 
     assert slice is not None
     assert len(slice.column_names) == 2
-    assert (
-        len(list(set(slice.column_names).difference(["column1", "nested"])))
-        == 0
-    )
+    assert len(list(set(slice.column_names).difference(["column1", "nested"]))) == 0
 
     assert len(slice.dims) == 2
     assert slice.dims == (2, 2)
diff --git a/tests/test_readme.py b/tests/test_readme.py
index 002d8de..a915fb4 100644
--- a/tests/test_readme.py
+++ b/tests/test_readme.py
@@ -55,12 +55,6 @@ def test_bframe():
     assert sliced_df is not None
     assert sliced_df.dims == (4, 3)
     assert (
-        len(
-            list(
-                set(sliced_df.column_names).difference(
-                    ["end", "strands", "scores"]
-                )
-            )
-        )
+        len(list(set(sliced_df.column_names).difference(["end", "strands", "scores"])))
         == 0
     )

From 491a8663f0c6d9de7e6860419ec11570b54cb4ed Mon Sep 17 00:00:00 2001
From: Max Hargreaves <hargreaves.max@gene.com>
Date: Thu, 21 Sep 2023 11:15:50 -0700
Subject: [PATCH 6/6] Changes: need for genomicranges

---
 src/biocframe/BiocFrame.py | 18 ++++++++++--------
 src/biocframe/py.typed     |  0
 src/biocframe/types.py     |  8 ++++++--
 3 files changed, 16 insertions(+), 10 deletions(-)
 create mode 100644 src/biocframe/py.typed

diff --git a/src/biocframe/BiocFrame.py b/src/biocframe/BiocFrame.py
index 9619848..54391a3 100644
--- a/src/biocframe/BiocFrame.py
+++ b/src/biocframe/BiocFrame.py
@@ -146,6 +146,7 @@ def __init__(
         """
         self._data: DataType = {} if data is None else data
         self._row_names = row_names
+        self._metadata = {} if metadata is None else metadata
         self._number_of_rows = validate_rows(
             self._data, number_of_rows, self._row_names
         )
@@ -153,10 +154,9 @@ def __init__(
             column_names, self._data
         )
         self._number_of_columns = len(self._column_names)
-        self._metadata = {} if metadata is None else metadata
 
-    def __repr__(self) -> str:
-        """Get a machine-readable string representation of the object."""
+    def _repr_table(self) -> str:
+        """Make the pretty table for the __repr__ method.."""
         table = PrettyTable(padding_width=1)
         table.field_names = [str(col) for col in self._column_names]
 
@@ -189,14 +189,16 @@ def __repr__(self) -> str:
 
         table.add_rows(rows)  # type: ignore
 
-        pattern = (
-            f"BiocFrame with {num_rows} rows & {self.dims[1]} columns \n"
+        return table.get_string()
+
+    def __repr__(self) -> str:
+        """Get a machine-readable string representation of the object."""
+        return (
+            f"BiocFrame with {self.shape[0]} rows & {self.dims[1]} columns \n"
             f"with row names: {self.row_names is not None} \n"
-            f"{table.get_string()}"  # type: ignore
+            f"{self._repr_table()}"  # type: ignore
         )
 
-        return pattern
-
     @property
     def shape(self) -> Tuple[int, int]:
         """Get shape of the data frame.
diff --git a/src/biocframe/py.typed b/src/biocframe/py.typed
new file mode 100644
index 0000000..e69de29
diff --git a/src/biocframe/types.py b/src/biocframe/types.py
index 11e4ef6..5fe99ce 100644
--- a/src/biocframe/types.py
+++ b/src/biocframe/types.py
@@ -46,12 +46,16 @@ def __len__(self) -> int:
         """Return the length of the data."""
         ...
 
+    def __iter__(self) -> Any:
+        """Iterate over the data."""
+        ...
+
 
 # Mapping is necessary as it is covariant which MutableMapping, etc. are not.
-ColType = Union[Mapping[str, Any], List[Any], BiocCol]
+ColType = Union[Mapping[str, Any], Sequence[Any], BiocCol]
 DataType = Union[
     Mapping[str, ColType],
     Mapping[str, Mapping[str, Any]],
-    Mapping[str, List[Any]],
+    Mapping[str, Sequence[Any]],
     Mapping[str, BiocCol],
 ]