From 6d612ad94a214252b2662ff76544b2f01c308897 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Tue, 15 Oct 2024 09:09:39 -0700 Subject: [PATCH] Fix coercion to pandas when `IRanges` contains mcols (#46) Add tests and update changelog. --- .pre-commit-config.yaml | 14 +++++++------- CHANGELOG.md | 3 ++- src/iranges/IRanges.py | 2 +- tests/test_IRanges_pandas.py | 11 +++++++++++ 4 files changed, 21 insertions(+), 9 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3c9601c..eed031a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,13 +17,13 @@ repos: - id: mixed-line-ending args: ['--fix=auto'] # replace 'auto' with 'lf' to enforce Linux/Mac line endings or 'crlf' for Windows -- repo: https://github.com/PyCQA/docformatter - rev: v1.7.5 - hooks: - - id: docformatter - additional_dependencies: [tomli] - args: [--in-place, --wrap-descriptions=120, --wrap-summaries=120] - # --config, ./pyproject.toml +# - repo: https://github.com/PyCQA/docformatter +# rev: v1.7.5 +# hooks: +# - id: docformatter +# additional_dependencies: [tomli] +# args: [--in-place, --wrap-descriptions=120, --wrap-summaries=120] +# # --config, ./pyproject.toml - repo: https://github.com/psf/black rev: 24.8.0 diff --git a/CHANGELOG.md b/CHANGELOG.md index 95a7c1e..24482ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,11 @@ # Changelog -## Version 0.2.10 +## Version 0.2.10 - 0.2.12 - Added a numpy vectorized version of finding gaps (tldr: not fast compared to the traditional version). May be needs a better implementation - Added NCLS based intersection operation (based on what pyranges does in their internals) - Added tests for intersection operations. +- Fixed and issue when coercing `IRanges` containing mcols. ## Version 0.2.8 - 0.2.9 diff --git a/src/iranges/IRanges.py b/src/iranges/IRanges.py index 17d2f85..ec84414 100644 --- a/src/iranges/IRanges.py +++ b/src/iranges/IRanges.py @@ -2220,7 +2220,7 @@ def to_pandas(self) -> "pandas.DataFrame": output = pd.DataFrame({"starts": _starts, "widths": _widths, "ends": _ends}) if self._mcols is not None and self._mcols.shape[1] > 0: - output = pd.concat([output, self._mcols.to_pandas()]) + output = pd.concat([output, self._mcols.to_pandas()], axis=1) if self._names is not None: output.index = self._names diff --git a/tests/test_IRanges_pandas.py b/tests/test_IRanges_pandas.py index 24f33fe..95b257c 100644 --- a/tests/test_IRanges_pandas.py +++ b/tests/test_IRanges_pandas.py @@ -24,3 +24,14 @@ def test_pandas_export(): assert y is not None assert isinstance(y, pd.DataFrame) assert set(y.columns.tolist()).issubset(["starts", "ends", "widths"]) + + +def test_pandas_with_mcols(): + x = IRanges( + [1, 2, 3, 4], [4, 5, 6, 7], mcols=BiocFrame({"temp": ["a", "t", "g", "c"]}) + ) + + y = x.to_pandas() + assert y is not None + assert isinstance(y, pd.DataFrame) + assert set(y.columns.tolist()).issubset(["starts", "ends", "widths", "temp"])