diff --git a/.github/workflows/build-docs.yml b/.github/workflows/build-docs.yml new file mode 100644 index 0000000..1280782 --- /dev/null +++ b/.github/workflows/build-docs.yml @@ -0,0 +1,43 @@ +name: Build documentation + +on: + push: + tags: + - "*" + +jobs: + test: + name: Build docs + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + with: + submodules: true + + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + cache: 'pip' + + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip setuptools + DEPENDENCIES=$(python -c 'from setuptools.config.setupcfg import read_configuration as c; a = c("setup.cfg"); print(" ".join(a["options"]["install_requires"][1:]))') + pip install ${DEPENDENCIES} + DOCDEPENDENCIES=$(python -c 'with open("docs/requirements.txt") as a: available = list(a); print(" ".join(map(lambda x : x.strip(), filter(lambda x : not x.startswith("#"), available))))') + pip install ${DOCDEPENDENCIES} + + - name: Build docs + run: | + touch src/dolomite_base/lib_dolomite_base.py + sphinx-build --color -b html -d docs/doctrees docs docs/_build/html + touch ./docs/_build/html/.nojekyll + + - name: GH Pages Deployment + uses: JamesIves/github-pages-deploy-action@4.1.3 + with: + branch: gh-pages # The branch the action should deploy to. + folder: ./docs/_build/html + clean: true # Automatically remove deleted files from the deploy branch diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml new file mode 100644 index 0000000..bdbb89c --- /dev/null +++ b/.github/workflows/publish-pypi.yml @@ -0,0 +1,146 @@ +name: Publish to PyPI + +on: + push: + tags: + - "*" + +jobs: + build_linux_x86_64: + name: Build wheels for linux x86_64 + runs-on: ubuntu-latest + steps: + - name: Check out repository + uses: actions/checkout@v3 + + - name: Build wheels + uses: pypa/cibuildwheel@v2.16.2 + env: + CIBW_ARCHS: x86_64 + CIBW_PROJECT_REQUIRES_PYTHON: ">=3.8" + CIBW_MANYLINUX_X86_64_IMAGE: ghcr.io/artifactdb/prebuilt-hdf5/manylinux_x86_64:0.0.4 + CIBW_MUSLLINUX_X86_64_IMAGE: ghcr.io/artifactdb/prebuilt-hdf5/musllinux_x86_64:0.0.4 + CIBW_SKIP: pp* + + - uses: actions/upload-artifact@v3 + with: + path: ./wheelhouse/*.whl + + build_macosx_x86_64: + name: Build wheels for macosx x86_64 + runs-on: macos-11 + steps: + - name: Check out repository + uses: actions/checkout@v3 + + - name: Grab prebuilt dependencies + run: | + curl -L https://github.com/ArtifactDB/prebuilt-hdf5/releases/download/0.0.4/macosx_x86_64.tar.gz > bundle.tar.gz + tar -xvf bundle.tar.gz + + - name: Build wheels + uses: pypa/cibuildwheel@v2.16.2 + env: + CIBW_ARCHS: x86_64 + CIBW_PROJECT_REQUIRES_PYTHON: ">=3.8" + CIBW_ENVIRONMENT: "MORE_CMAKE_OPTIONS=\"-DCMAKE_INSTALL_PREFIX=$(pwd)/installed -DCMAKE_OSX_ARCHITECTURES=x86_64\"" + CIBW_BUILD_VERBOSITY: 3 + CIBW_SKIP: pp* + MACOSX_DEPLOYMENT_TARGET: 11.7 + + - uses: actions/upload-artifact@v3 + with: + path: ./wheelhouse/*.whl + + build_macosx_arm64: + name: Build wheels for macosx arm64 + runs-on: macos-13 + steps: + - name: Check out repository + uses: actions/checkout@v3 + + - name: Grab prebuilt dependencies + run: | + curl -L https://github.com/ArtifactDB/prebuilt-hdf5/releases/download/0.0.4-manual/macosx_arm64.tar.gz > bundle.tar.gz + tar -xvf bundle.tar.gz + + - name: Build wheels + uses: pypa/cibuildwheel@v2.16.2 + env: + CIBW_ARCHS: arm64 + CIBW_PROJECT_REQUIRES_PYTHON: ">=3.8" + CIBW_ENVIRONMENT: "MORE_CMAKE_OPTIONS=\"-DCMAKE_INSTALL_PREFIX=$(pwd)/installed -DCMAKE_OSX_ARCHITECTURES=arm64\"" + CIBW_BUILD_VERBOSITY: 3 + MACOSX_DEPLOYMENT_TARGET: 13.0 + + - uses: actions/upload-artifact@v3 + with: + path: ./wheelhouse/*.whl + +# build_windows_x86_64: +# name: Build wheels for windows x86_64 +# runs-on: windows-2019 +# steps: +# - name: Check out repository +# uses: actions/checkout@v3 +# +# - name: Grab prebuilt dependencies +# run: | +# curl -L https://github.com/ArtifactDB/prebuilt-hdf5/releases/download/0.0.4/windows_x86_64.tar.gz > bundle.tar.gz +# tar -xvf bundle.tar.gz +# shell: bash +# +# - name: Store path +# run: | +# $wd = pwd +# echo "INSTALL_DIR=$wd\\installed" >> $env:GITHUB_ENV +# shell: powershell +# +# - name: Build wheels +# uses: pypa/cibuildwheel@v2.16.2 +# env: +# CIBW_ARCHS: AMD64 +# CIBW_PROJECT_REQUIRES_PYTHON: ">=3.8" +# CIBW_ENVIRONMENT: "MORE_CMAKE_OPTIONS=\"-DCMAKE_INSTALL_PREFIX=${INSTALL_DIR}\" VERBOSE=1" +# CIBW_BEFORE_BUILD_WINDOWS: "pip install delvewheel" +# CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: "delvewheel repair -w {dest_dir} {wheel}" +# CIBW_TEST_EXTRAS: "testing" +# CIBW_TEST_COMMAND: "pytest {package}/tests" +# CIBW_BUILD_VERBOSITY: 3 + + - uses: actions/upload-artifact@v3 + with: + path: ./wheelhouse/*.whl + + build_sdist: + name: Build source distribution + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: true + + - name: Build sdist + run: pipx run build --sdist + + - uses: actions/upload-artifact@v3 + with: + path: dist/*.tar.gz + + upload_pypi: + needs: [build_linux_x86_64, build_macosx_x86_64, build_macosx_arm64, build_sdist] + runs-on: ubuntu-latest + # upload to PyPI on every tag starting with 'v' + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') + steps: + - uses: actions/download-artifact@v3 + with: + # unpacks default artifact into dist/ + # if `name: artifact` is omitted, the action will create extra parent dir + name: artifact + path: dist + + - uses: pypa/gh-action-pypi-publish@v1.8.3 + with: + user: __token__ + password: ${{ secrets.PYPI_PASSWORD }} diff --git a/.github/workflows/pypi-test.yml b/.github/workflows/pypi-test.yml deleted file mode 100644 index a91bd99..0000000 --- a/.github/workflows/pypi-test.yml +++ /dev/null @@ -1,148 +0,0 @@ -# This workflow will install Python dependencies, run tests and lint with a single version of Python -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions - -name: Test the library - -on: - push: - branches: - - master - tags: - - "*" - pull_request: - -jobs: - test: - name: Running tests - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - with: - submodules: true - - - name: "Install system dependencies" - run: | - sudo apt-get install zlib1g-dev - - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9 - cache: 'pip' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip setuptools - DEPENDENCIES=$(python -c 'from setuptools.config.setupcfg import read_configuration as c; a = c("setup.cfg"); print(" ".join(a["options"]["install_requires"][1:] + a["options"]["extras_require"]["testing"][1:]))') - pip install ${DEPENDENCIES} - - - name: Download rds2cpp deps - run: | - cd extern/rds2cpp - cmake . - cd ../.. - - # We do proper tests if we're on the master branch, or if we're creating a new release. - - name: Test with tox - if: github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/tags/') - run: | - pip install tox - tox - - - name: Build docs - if: github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/tags/') - run: | - tox -e docs - touch ./docs/_build/html/.nojekyll - - # Otherwise we do some cached builds and tests for faster merging of PRs. - - name: Set up ccache - if: github.ref != 'refs/heads/master' && !startsWith(github.ref, 'refs/tags') - uses: hendrikmuhs/ccache-action@v1.2 - - - name: Quickly build and test - if: github.ref != 'refs/heads/master' && !startsWith(github.ref, 'refs/tags') - run: | - CC="ccache gcc" python setup.py install - pytest - - - name: GH Pages Deployment - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') - uses: JamesIves/github-pages-deploy-action@4.1.3 - with: - branch: gh-pages # The branch the action should deploy to. - folder: ./docs/_build/html - clean: true # Automatically remove deleted files from the deploy branch - - build_wheels: - name: Build wheels on ${{ matrix.os }} - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-22.04, macos-13] # at some point get this to work on windows-2019 - - steps: - - uses: actions/checkout@v3 - with: - submodules: true - - - name: Download rds2cpp deps - run: | - cd extern/rds2cpp - cmake . - cd ../.. - - - name: Build wheels - uses: pypa/cibuildwheel@v2.12.1 - env: - CIBW_ARCHS_MACOS: x86_64 arm64 - CIBW_ARCHS_LINUX: x86_64 # remove this later so we build for all linux archs - CIBW_PROJECT_REQUIRES_PYTHON: ">=3.8" - CIBW_SKIP: pp* # remove this later, but for some reason fails to generate pypy wheels - - - uses: actions/upload-artifact@v3 - with: - path: ./wheelhouse/*.whl - - build_sdist: - name: Build source distribution - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - with: - submodules: true - - - name: Download rds2cpp deps - run: | - cd extern/rds2cpp - cmake . - cd ../.. - - - name: Build sdist - run: pipx run build --sdist - - - uses: actions/upload-artifact@v3 - with: - path: dist/*.tar.gz - - upload_pypi: - needs: [test, build_wheels, build_sdist] - runs-on: ubuntu-latest - # upload to PyPI on every tag starting with 'v' - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') - # alternatively, to publish when a GitHub Release is created, use the following rule: - # if: github.event_name == 'release' && github.event.action == 'published' - steps: - - uses: actions/download-artifact@v3 - with: - # unpacks default artifact into dist/ - # if `name: artifact` is omitted, the action will create extra parent dir - name: artifact - path: dist - - - uses: pypa/gh-action-pypi-publish@v1.8.3 - with: - user: __token__ - password: ${{ secrets.PYPI_PASSWORD }} diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml new file mode 100644 index 0000000..5c8d6b1 --- /dev/null +++ b/.github/workflows/run-tests.yml @@ -0,0 +1,51 @@ +name: Run unit tests + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + test: + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12' ] + + name: Python ${{ matrix.python-version }} + steps: + - uses: actions/checkout@v3 + with: + submodules: true + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip setuptools + DEPENDENCIES=$(python -c 'from setuptools.config.setupcfg import read_configuration as c; a = c("setup.cfg"); print(" ".join(a["options"]["install_requires"][1:] + a["options"]["extras_require"]["testing"][1:]))') + pip install ${DEPENDENCIES} pybind11 + + # We do proper tests if we're on the master branch, or if we're creating a new release. + - name: Test with tox + if: github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/tags/') + run: | + pip install tox + tox + + # Otherwise we do some cached builds and tests for faster merging of PRs. + - name: Set up ccache + if: github.ref != 'refs/heads/master' && !startsWith(github.ref, 'refs/tags') + uses: hendrikmuhs/ccache-action@v1.2 + + - name: Quickly build and test + if: github.ref != 'refs/heads/master' && !startsWith(github.ref, 'refs/tags') + run: | + CC="ccache gcc" python setup.py install + pytest diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index e2a53d4..0000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "extern/rds2cpp"] - path = extern/rds2cpp - url = https://github.com/LTLA/rds2cpp diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3c9601c..96cab27 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: args: ['--fix=auto'] # replace 'auto' with 'lf' to enforce Linux/Mac line endings or 'crlf' for Windows - repo: https://github.com/PyCQA/docformatter - rev: v1.7.5 + rev: "master" hooks: - id: docformatter additional_dependencies: [tomli] diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 55bba17..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,2 +0,0 @@ -recursive-include src/rds2py/lib/ * -recursive-include extern * diff --git a/extern/rds2cpp b/extern/rds2cpp deleted file mode 160000 index c27d493..0000000 --- a/extern/rds2cpp +++ /dev/null @@ -1 +0,0 @@ -Subproject commit c27d4936a42802ce095df88afe5843621354e250 diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt new file mode 100644 index 0000000..c38c3b8 --- /dev/null +++ b/lib/CMakeLists.txt @@ -0,0 +1,36 @@ +cmake_minimum_required(VERSION 3.24) + +project(rds2py + VERSION 1.0.0 + DESCRIPTION "Building the rds shared library" + LANGUAGES CXX) + +# Importing all of the dependencies with pinned versions (even for transitive dependencies). +include(FetchContent) + +FetchContent_Declare( + rds2cpp + GIT_REPOSITORY https://github.com/LTLA/rds2cpp + GIT_TAG v1.1.0 +) + +FetchContent_MakeAvailable(rds2cpp) + +# Defining the targets. +set(TARGET rds2py) + +find_package(pybind11 CONFIG) + +# pybind11 method: +pybind11_add_module(${TARGET} + src/rdswrapper.cpp +) + +set_property(TARGET ${TARGET} PROPERTY CXX_STANDARD 17) + +target_link_libraries(${TARGET} PRIVATE rds2cpp pybind11::pybind11) + +set_target_properties(${TARGET} PROPERTIES + OUTPUT_NAME rds_parser + PREFIX "" +) diff --git a/lib/src/rdswrapper.cpp b/lib/src/rdswrapper.cpp new file mode 100644 index 0000000..2c9dcfa --- /dev/null +++ b/lib/src/rdswrapper.cpp @@ -0,0 +1,186 @@ +#include +#include +#include +#include +#include +#include + +namespace py = pybind11; + +class RdsReader { +private: + const rds2cpp::RObject* ptr; + +public: + RdsReader(const rds2cpp::RObject* p) : ptr(p) { + if (!p) throw std::runtime_error("Null pointer passed to 'RdsReader'."); + } + + std::string get_rtype() const { + if (!ptr) throw std::runtime_error("Null pointer in 'get_rtype'."); + // py::print("arg::", static_cast(ptr->type())); + switch (ptr->type()) { + case rds2cpp::SEXPType::INT: return "integer"; + case rds2cpp::SEXPType::REAL: return "double"; + case rds2cpp::SEXPType::STR: return "string"; + case rds2cpp::SEXPType::LGL: return "boolean"; + case rds2cpp::SEXPType::VEC: return "vector"; + case rds2cpp::SEXPType::S4: return "S4"; + case rds2cpp::SEXPType::NIL: return "null"; + default: return "other"; + } + } + + int get_rsize() const { + if (!ptr) throw std::runtime_error("Null pointer in 'get_rsize'."); + switch (ptr->type()) { + case rds2cpp::SEXPType::INT: return static_cast(ptr)->data.size(); + case rds2cpp::SEXPType::REAL: return static_cast(ptr)->data.size(); + case rds2cpp::SEXPType::STR: return static_cast(ptr)->data.size(); + case rds2cpp::SEXPType::LGL: return static_cast(ptr)->data.size(); + case rds2cpp::SEXPType::VEC: return static_cast(ptr)->data.size(); + default: return -1; + } + } + + py::array get_numeric_data() const { + if (!ptr) throw std::runtime_error("Null pointer in 'get_numeric_data'."); + switch (ptr->type()) { + case rds2cpp::SEXPType::INT: { + const auto& data = static_cast(ptr)->data; + return py::array_t({data.size()}, {sizeof(int32_t)}, data.data()); + } + case rds2cpp::SEXPType::LGL: { + const auto& data = static_cast(ptr)->data; + return py::array_t({data.size()}, {sizeof(int32_t)}, data.data()); + } + case rds2cpp::SEXPType::REAL: { + const auto& data = static_cast(ptr)->data; + return py::array_t({data.size()}, {sizeof(double)}, data.data()); + } + default: + throw std::runtime_error("Invalid type for numeric data"); + } + } + + py::list get_string_arr() const { + if (!ptr) throw std::runtime_error("Null pointer in 'get_string_arr'."); + if (ptr->type() != rds2cpp::SEXPType::STR) { + throw std::runtime_error("Invalid type for 'string_arr'"); + } + const auto& data = static_cast(ptr)->data; + return py::cast(data); + } + + py::list get_attribute_names() const { + if (!ptr) throw std::runtime_error("Null pointer in 'get_attribute_names'"); + return py::cast(get_attributes().names); + } + + py::object load_attribute_by_name(const std::string& name) const { + if (!ptr) throw std::runtime_error("Null pointer in 'load_attribute_by_name'"); + const auto& attributes = get_attributes(); + auto it = std::find(attributes.names.begin(), attributes.names.end(), name); + if (it == attributes.names.end()) { + throw std::runtime_error("Attribute not found: " + name); + } + size_t index = std::distance(attributes.names.begin(), it); + return py::cast(new RdsReader(attributes.values[index].get())); + } + + py::object load_vec_element(int index) const { + if (!ptr) throw std::runtime_error("Null pointer in 'load_vec_element'"); + if (ptr->type() != rds2cpp::SEXPType::VEC) { + throw std::runtime_error("Not a vector type"); + } + const auto& data = static_cast(ptr)->data; + if (index < 0 || static_cast(index) >= data.size()) { + throw std::out_of_range("Vector index out of range"); + } + return py::cast(new RdsReader(data[index].get())); + } + + std::string get_package_name() const { + if (!ptr || ptr->type() != rds2cpp::SEXPType::S4) { + throw std::runtime_error("Not an S4 object"); + } + return static_cast(ptr)->package_name; + } + + std::string get_class_name() const { + if (!ptr || ptr->type() != rds2cpp::SEXPType::S4) { + throw std::runtime_error("Not an S4 object"); + } + return static_cast(ptr)->class_name; + } + + std::pair get_dimensions() const { + if (!ptr || ptr->type() != rds2cpp::SEXPType::INT) { + throw std::runtime_error("Dimensions must be integer"); + } + const auto& dims = static_cast(ptr)->data; + if (dims.size() != 2 || dims[0] < 0 || dims[1] < 0) { + throw std::runtime_error("Invalid dimensions"); + } + return {static_cast(dims[0]), static_cast(dims[1])}; + } + +private: + const rds2cpp::Attributes& get_attributes() const { + if (!ptr) throw std::runtime_error("Null pointer in get_attributes"); + switch (ptr->type()) { + case rds2cpp::SEXPType::INT: return static_cast(ptr)->attributes; + case rds2cpp::SEXPType::REAL: return static_cast(ptr)->attributes; + case rds2cpp::SEXPType::LGL: return static_cast(ptr)->attributes; + case rds2cpp::SEXPType::STR: return static_cast(ptr)->attributes; + case rds2cpp::SEXPType::VEC: return static_cast(ptr)->attributes; + case rds2cpp::SEXPType::S4: return static_cast(ptr)->attributes; + default: throw std::runtime_error("Unsupported type for attributes"); + } + } +}; + +class RdsObject { +private: + std::unique_ptr parsed; + std::unique_ptr reader; + +public: + RdsObject(const std::string& file) { + try { + parsed = std::make_unique(rds2cpp::parse_rds(file)); + if (!parsed || !parsed->object) { + throw std::runtime_error("Failed to parse RDS file"); + } + reader = std::make_unique(parsed->object.get()); + } catch (const std::exception& e) { + throw std::runtime_error(std::string("Error in 'RdsObject' constructor: ") + e.what()); + } + } + + RdsReader* get_robject() const { + if (!reader) throw std::runtime_error("Null reader in 'get_robject'"); + return reader.get(); + } +}; + +PYBIND11_MODULE(rds_parser, m) { + py::register_exception(m, "RdsParserError"); + + py::class_(m, "RdsObject") + .def(py::init()) + .def("get_robject", &RdsObject::get_robject, py::return_value_policy::reference_internal); + + py::class_(m, "RdsReader") + .def(py::init()) + .def("get_rtype", &RdsReader::get_rtype) + .def("get_rsize", &RdsReader::get_rsize) + .def("get_numeric_data", &RdsReader::get_numeric_data) + .def("get_string_arr", &RdsReader::get_string_arr) + .def("get_attribute_names", &RdsReader::get_attribute_names) + .def("load_attribute_by_name", &RdsReader::load_attribute_by_name) + .def("load_vec_element", &RdsReader::load_vec_element) + .def("get_package_name", &RdsReader::get_package_name) + .def("get_class_name", &RdsReader::get_class_name) + .def("get_dimensions", &RdsReader::get_dimensions); +} diff --git a/pyproject.toml b/pyproject.toml index e917d21..b2ee221 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [build-system] # AVOID CHANGING REQUIRES: IT WILL BE UPDATED BY PYSCAFFOLD! -requires = ["setuptools>=46.1.0", "setuptools_scm[toml]>=5", "wheel", "Cython", "numpy"] +requires = ["setuptools>=46.1.0", "setuptools_scm[toml]>=5", "cmake", "pybind11", "numpy"] build-backend = "setuptools.build_meta" [tool.setuptools_scm] diff --git a/setup.cfg b/setup.cfg index 62610c8..623f78c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -49,7 +49,6 @@ python_requires = >=3.8 # For more information, check out https://semver.org/. install_requires = importlib-metadata; python_version<"3.8" - Cython numpy pandas scipy diff --git a/setup.py b/setup.py index 4765bd8..5a221d5 100644 --- a/setup.py +++ b/setup.py @@ -5,34 +5,75 @@ Learn more under: https://pyscaffold.org/ """ -import numpy -from Cython.Build import cythonize -from setuptools import setup -from setuptools.extension import Extension +from setuptools import setup, Extension +from setuptools.command.build_ext import build_ext as build_ext_orig +import pathlib +import os +import shutil +import sys +import pybind11 + + +### Adapted from dolomite_base: https://github.com/ArtifactDB/dolomite-base/blob/master/setup.py +## Adapted from https://stackoverflow.com/questions/42585210/extending-setuptools-extension-to-use-cmake-in-setup-py. +class CMakeExtension(Extension): + def __init__(self, name): + super().__init__(name, sources=[]) + + +class build_ext(build_ext_orig): + def run(self): + for ext in self.extensions: + self.build_cmake(ext) + + def build_cmake(self, ext): + build_temp = pathlib.Path(self.build_temp) + build_lib = pathlib.Path(self.build_lib) + outpath = os.path.join(build_lib.absolute(), ext.name) + + if not os.path.exists(build_temp): + cmd = [ + "cmake", + "-S", + "lib", + "-B", + build_temp, + "-Dpybind11_DIR=" + + os.path.join( + os.path.dirname(pybind11.__file__), "share", "cmake", "pybind11" + ), + "-DPYTHON_EXECUTABLE=" + sys.executable, + ] + if os.name != "nt": + cmd.append("-DCMAKE_BUILD_TYPE=Release") + cmd.append("-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + outpath) + + if "MORE_CMAKE_OPTIONS" in os.environ: + cmd += os.environ["MORE_CMAKE_OPTIONS"].split() + self.spawn(cmd) + + if not self.dry_run: + cmd = ["cmake", "--build", build_temp] + if os.name == "nt": + cmd += ["--config", "Release"] + self.spawn(cmd) + if os.name == "nt": + # Gave up trying to get MSVC to respect the output directory. + # Delvewheel also needs it to have a 'pyd' suffix... whatever. + shutil.copyfile( + os.path.join(build_temp, "Release", "_core.dll"), + os.path.join(outpath, "_core.pyd"), + ) + if __name__ == "__main__": + import os + try: setup( use_scm_version={"version_scheme": "no-guess-dev"}, - ext_modules=cythonize( - [ - Extension( - "rds2py.core", - ["src/rds2py/lib/rds_parser.cpp", "src/rds2py/lib/parser.pyx"], - include_dirs=[ - "extern/rds2cpp/include", - "extern/rds2cpp/_deps/byteme-src/include", - numpy.get_include(), - ], - language="c++", - extra_compile_args=[ - "-std=c++17", - ], - extra_link_args=["-lz"], - ) - ], - compiler_directives={"language_level": "3"}, - ), + ext_modules=[CMakeExtension("rds2py")], + cmdclass={"build_ext": build_ext}, ) except: # noqa print( diff --git a/src/rds2py/PyRdsReader.py b/src/rds2py/PyRdsReader.py new file mode 100644 index 0000000..5219d31 --- /dev/null +++ b/src/rds2py/PyRdsReader.py @@ -0,0 +1,93 @@ +from .rds_parser import RdsObject, RdsReader +import numpy as np +from typing import Dict, Any, List, Union +from warnings import warn + + +class PyRdsParserError(Exception): + pass + + +class PyRdsParser: + """Python bindings to the rds2cpp interface.""" + + def __init__(self, file_path: str): + try: + self.rds_object = RdsObject(file_path) + robject = self.rds_object.get_robject() + if not isinstance(robject, RdsReader): + raise TypeError(f"Expected 'RdsReader' object, got {type(robject)}") + self.root_object = robject + except Exception as e: + raise PyRdsParserError(f"Error initializing 'PyRdsParser': {str(e)}") + + def parse(self) -> Dict[str, Any]: + """Parse the RDS File (recursively). + + Returns: + A Dictionary with object attributes as keys and the value representing the data from the RDS file. + """ + try: + return self._process_object(self.root_object) + except Exception as e: + raise PyRdsParserError(f"Error parsing RDS object: {str(e)}") + + def _process_object(self, obj: RdsReader) -> Dict[str, Any]: + try: + rtype = obj.get_rtype() + result: Dict[str, Any] = {"type": rtype} + + if rtype in ["integer", "boolean", "double"]: + result["data"] = self._get_numeric_data(obj, rtype) + result["attributes"] = self._process_attributes(obj) + elif rtype == "string": + result["data"] = obj.get_string_arr() + elif rtype == "vector": + result["data"] = self._process_vector(obj) + result["attributes"] = self._process_attributes(obj) + elif rtype == "S4": + result["package_name"] = obj.get_package_name() + result["class_name"] = obj.get_class_name() + result["attributes"] = self._process_attributes(obj) + elif rtype == "null": + pass + else: + # raise ValueError + warn(f"Unsupported R object type: {rtype}") + result["data"] = None + result["attributes"] = None + + return result + except Exception as e: + raise PyRdsParserError(f"Error processing object: {str(e)}") + + def _get_numeric_data(self, obj: RdsReader, rtype: str) -> np.ndarray: + try: + data = obj.get_numeric_data() + if rtype == "boolean": + return data.astype(bool) + return data + except Exception as e: + raise PyRdsParserError(f"Error getting numeric data: {str(e)}") + + def _process_vector(self, obj: RdsReader) -> List[Dict[str, Any]]: + return [ + self._process_object(obj.load_vec_element(i)) + for i in range(obj.get_rsize()) + ] + + def _process_attributes(self, obj: RdsReader) -> Dict[str, Dict[str, Any]]: + try: + attributes = {} + for name in obj.get_attribute_names(): + attr_obj = obj.load_attribute_by_name(name) + attributes[name] = self._process_object(attr_obj) + return attributes + except Exception as e: + raise PyRdsParserError(f"Error processing attributes: {str(e)}") + + def get_dimensions(self) -> Union[tuple, None]: + try: + return self.root_object.get_dimensions() + except Exception as e: + raise PyRdsParserError(f"Error getting dimensions: {str(e)}") diff --git a/src/rds2py/lib/parser.pxd b/src/rds2py/lib/parser.pxd deleted file mode 100644 index 7ac6cba..0000000 --- a/src/rds2py/lib/parser.pxd +++ /dev/null @@ -1,29 +0,0 @@ -# pretty basic Cython wrapper - -from libcpp.string cimport string as string_c -from libc.stdint cimport uintptr_t -from libcpp.vector cimport vector -from libcpp.utility cimport pair - -cdef extern from "rds_parser.cpp": - uintptr_t py_parser_rds_file(string_c file) nogil except + - uintptr_t py_parser_extract_robject(int ptr) nogil except + - - void py_read_parsed_ptr(uintptr_t ptr) nogil except + - - string_c py_robject_extract_type(uintptr_t ptr) nogil except + - int py_robject_extract_size(uintptr_t ptr) nogil except + - - uintptr_t parse_robject_int_vector(uintptr_t ptr) nogil except + - vector[string_c] parse_robject_string_vector(uintptr_t ptr) nogil except + - vector[string_c] parse_robject_attribute_names(uintptr_t ptr) nogil except + - - int parse_robject_find_attribute(uintptr_t ptr, string_c name) nogil except + - uintptr_t parse_robject_load_attribute_by_index(uintptr_t ptr, int i) nogil except + - uintptr_t parse_robject_load_attribute_by_name(uintptr_t ptr, string_c name) nogil except + - uintptr_t parse_robject_load_vec_element(uintptr_t ptr, int i) except + - - string_c parse_robject_class_name(uintptr_t ptr) nogil except + - string_c parse_robject_package_name(uintptr_t ptr) nogil except + - - pair[int, int] parse_robject_dimensions(uintptr_t ptr) nogil except + diff --git a/src/rds2py/lib/parser.pyx b/src/rds2py/lib/parser.pyx deleted file mode 100644 index 467404f..0000000 --- a/src/rds2py/lib/parser.pyx +++ /dev/null @@ -1,168 +0,0 @@ -# pretty basic Cython wrapper - -from parser cimport ( - py_parser_rds_file, py_parser_extract_robject, - py_robject_extract_type, py_robject_extract_size, - parse_robject_int_vector, - parse_robject_string_vector, - parse_robject_attribute_names, - parse_robject_find_attribute, - parse_robject_load_attribute_by_index, - parse_robject_load_attribute_by_name, - parse_robject_load_vec_element, - parse_robject_class_name, - parse_robject_package_name, - parse_robject_dimensions -) -from libc.stdint cimport uintptr_t -from libcpp.string cimport string as string_c -from libcpp.vector cimport vector -from cython cimport view - -cimport numpy as np -import numpy as np - -cdef class PyParsedObject: - cdef uintptr_t ptr - - def __cinit__(self, file): - self.ptr = py_parser_rds_file(file.encode()) - - def get_robject(self): - cdef uintptr_t tmp = py_parser_extract_robject(self.ptr) - return PyRObject(tmp) - -cdef _map_ptr_to_view(uintptr_t ptr, shape, itemsize, format_type): - cdef view.array my_array = view.array(shape=shape, itemsize=itemsize, format=format_type) - my_array.data = ptr - return np.asarray(my_array) - -cdef class PyRObject: - cdef uintptr_t ptr - cdef string_c rtype - cdef int rsize - R_MIN = -2147483648 - - def __cinit__(self, p:uintptr_t): - self.ptr = p - self.get_rtype() - self.get_rsize() - - def get_rtype(self): - if not hasattr(self, "rtype"): - self.rtype = py_robject_extract_type(self.ptr) - return self.rtype - - def get_rsize(self): - if not hasattr(self, "rsize"): - self.rsize = py_robject_extract_size(self.ptr) - return self.rsize - - def shennanigans_to_py_reprs(self, result): - if result is None: - return result - - if self.rtype.decode() in ["integer"]: - if self.rsize == 2 and result["data"][0] == self.R_MIN and result["data"][1] < 0: - result["data"] = range(result["data"][1] * -1) - - return result - - def realize_value(self): - result = {} - if self.rtype.decode() in ["integer", "boolean"]: - result["data"] = self._get_int_or_bool_arr() - result["attributes"] = self.realize_attr_value() - elif self.rtype.decode('UTF-8') in ["double"]: - result["data"] = self._get_double_arr() - result["attributes"] = self.realize_attr_value() - elif self.rtype.decode('UTF-8') in ["string"]: - result["data"] = [s.decode() for s in self._get_string_arr()] - elif self.rtype.decode('UTF-8') in ["vector"]: - result["data"] = self._get_vector_arr() - result["attributes"] = self.realize_attr_value() - elif self.rtype.decode('UTF-8') in ["null"]: - return None - elif self.rtype.decode('UTF-8') in ["S4"]: - result = { - "data": None, - "package_name": self.get_package_name(), - "class_name": self.get_class_name() - } - result["attributes"] = self.realize_attr_value() - else: - return { - "data": None, - "attributes": None - } - # raise Exception(f'Cannot realize {self.rtype.decode()}') - - return self.shennanigans_to_py_reprs(result) - - def _get_vector_arr(self): - vec = [] - for i in range(self.rsize): - v_obj = self.load_vec_element(i) - v_obj_val = v_obj.realize_value() - vec.append(v_obj_val) - - return vec - - def _get_int_or_bool_arr(self): - if self.rsize == 0: - return np.empty(shape=(self.rsize,), dtype=int) - cdef uintptr_t arr_ptr = parse_robject_int_vector(self.ptr) - return _map_ptr_to_view(arr_ptr, shape=(self.rsize,), itemsize=sizeof(int), format_type="i") - - def _get_double_arr(self): - if self.rsize == 0: - return np.empty(shape=(self.rsize,), dtype="f8") - cdef uintptr_t arr_ptr = parse_robject_int_vector(self.ptr) - return _map_ptr_to_view(arr_ptr, shape=(self.rsize,), itemsize=sizeof(double), format_type="d") - - def _get_string_arr(self): - cdef vector[string_c] arr_str = parse_robject_string_vector(self.ptr) - return arr_str - - def get_attribute_names(self): - cdef vector[string_c] arr_str = parse_robject_attribute_names(self.ptr) - return arr_str - - def find_attribute(self, name): - return parse_robject_find_attribute(self.ptr, name.encode()) - - def load_attribute_by_index(self, index): - cdef uintptr_t tmp = parse_robject_load_attribute_by_index(self.ptr, index) - return PyRObject(tmp) - - def load_attribute_by_name(self, name): - cdef uintptr_t tmp = parse_robject_load_attribute_by_name(self.ptr, name.encode()) - return PyRObject(tmp) - - def load_vec_element(self, i): - cdef uintptr_t tmp = parse_robject_load_vec_element(self.ptr, i) - return PyRObject(tmp) - - def get_package_name(self): - if self.rtype.decode() == "S4": - return parse_robject_package_name(self.ptr).decode() - - raise Exception(f'package name does not exist on non-S4 classes') - - def get_class_name(self): - if self.rtype.decode() == "S4": - return parse_robject_class_name(self.ptr).decode() - - raise Exception(f'class name does not exist on non-S4 classes') - - def get_dimensions(self): - return parse_robject_dimensions(self.ptr) - - def realize_attr_value(self): - result = {} - - for ro_attr in self.get_attribute_names(): - tmp_obj = self.load_attribute_by_name(ro_attr.decode()) - result[ro_attr.decode()] = tmp_obj.realize_value() - - return result diff --git a/src/rds2py/lib/rds_parser.cpp b/src/rds2py/lib/rds_parser.cpp deleted file mode 100644 index 7537d1a..0000000 --- a/src/rds2py/lib/rds_parser.cpp +++ /dev/null @@ -1,266 +0,0 @@ -#include "rds2cpp/rds2cpp.hpp" -#include - -// Interface methods to Parser Object - -inline uintptr_t py_parser_rds_file(std::string file) { - rds2cpp::Parsed res = rds2cpp::parse_rds(file); - - return reinterpret_cast(new rds2cpp::Parsed(std::move(res))); -} - -inline uintptr_t py_parser_extract_robject(uintptr_t ptr) { - auto parsed = reinterpret_cast(ptr); - return reinterpret_cast(parsed->object.get()); -} - -// probably don't need this, mostly for testing -inline void py_read_parsed_ptr(uintptr_t ptr) { - auto parsed = reinterpret_cast(ptr); -} - -// Interface Methods to RObject - -inline std::string py_robject_extract_type(uintptr_t ptr) { - auto parsed = reinterpret_cast(ptr); - switch (parsed->type()) { - case rds2cpp::SEXPType::INT: - return "integer"; - case rds2cpp::SEXPType::REAL: - return "double"; - case rds2cpp::SEXPType::STR: - return "string"; - case rds2cpp::SEXPType::LGL: - return "boolean"; - case rds2cpp::SEXPType::VEC: - return "vector"; - case rds2cpp::SEXPType::S4: - return "S4"; - case rds2cpp::SEXPType::NIL: - return "null"; - default: - break; - } - return "other"; -} - -template -int _size_(const rds2cpp::RObject* ptr) { - auto xptr = static_cast(ptr); - return xptr->data.size(); -} - -inline int py_robject_extract_size(uintptr_t ptr) { - auto parsed = reinterpret_cast(ptr); - switch (parsed->type()) { - case rds2cpp::SEXPType::INT: - return _size_(parsed); - case rds2cpp::SEXPType::REAL: - return _size_(parsed); - case rds2cpp::SEXPType::STR: - return _size_(parsed); - case rds2cpp::SEXPType::LGL: - return _size_(parsed); - case rds2cpp::SEXPType::VEC: - return _size_(parsed); - default: - break; - } - return -1; -} - -template -uintptr_t _get_vector_ptr(const rds2cpp::RObject* ptr) { - auto xptr = static_cast(ptr); - return reinterpret_cast(xptr->data.data()); -} - -inline uintptr_t parse_robject_int_vector(uintptr_t ptr) { - auto parsed = reinterpret_cast(ptr); - switch (parsed->type()) { - case rds2cpp::SEXPType::INT: - return _get_vector_ptr(parsed); - case rds2cpp::SEXPType::LGL: - return _get_vector_ptr(parsed); - case rds2cpp::SEXPType::REAL: - return _get_vector_ptr(parsed); - default: - break; - } - throw std::runtime_error("cannot obtain numeric values for non-numeric RObject type"); - return _get_vector_ptr(parsed); // avoid compiler warning. -} - -// inline uintptr_t parse_robject_double_vector(uintptr_t ptr) { -// auto parsed = reinterpret_cast(ptr); -// switch (parsed->type()) { -// case rds2cpp::SEXPType::REAL: -// return _get_vector_ptr(parsed); -// default: -// break; -// } -// throw std::runtime_error("cannot obtain numeric values for non-numeric RObject type"); -// return _get_vector_ptr(parsed); // avoid compiler warning. -// } - -inline std::vector parse_robject_string_vector(uintptr_t ptr) { - auto parsed = reinterpret_cast(ptr); - if (parsed->type() != rds2cpp::SEXPType::STR) { - throw std::runtime_error("cannot return string values for non-string RObject type"); - } - auto sptr = static_cast(parsed); - - return sptr->data; -} - -template -const rds2cpp::Attributes& _get_attr_ptr(const rds2cpp::RObject* ptr) { - auto aptr = static_cast(ptr); - return aptr->attributes; -} - -inline std::vector parse_robject_attribute_names(uintptr_t ptr) { - auto parsed = reinterpret_cast(ptr); - switch (parsed->type()) { - case rds2cpp::SEXPType::INT: - return _get_attr_ptr(parsed).names; - break; - case rds2cpp::SEXPType::REAL: - return _get_attr_ptr(parsed).names; - break; - case rds2cpp::SEXPType::LGL: - return _get_attr_ptr(parsed).names; - break; - case rds2cpp::SEXPType::VEC: - return _get_attr_ptr(parsed).names; - break; - case rds2cpp::SEXPType::S4: - return _get_attr_ptr(parsed).names; - break; - default: - break; - } - return _get_attr_ptr(parsed).names; // avoid compiler warning. -} - -template -int _contains_attr_(const rds2cpp::RObject* ptr, const std::string& name) { - auto aptr = static_cast(ptr); - const auto& attr_names = aptr->attributes.names; - - for (size_t i = 0; i < attr_names.size(); ++i) { - if (attr_names[i] == name) { - return i; - } - } - - return -1; -} - -inline int parse_robject_find_attribute(uintptr_t ptr, std::string name) { - auto parsed = reinterpret_cast(ptr); - switch (parsed->type()) { - case rds2cpp::SEXPType::INT: - return _contains_attr_(parsed, name); - case rds2cpp::SEXPType::REAL: - return _contains_attr_(parsed, name); - case rds2cpp::SEXPType::LGL: - return _contains_attr_(parsed, name); - case rds2cpp::SEXPType::STR: - return _contains_attr_(parsed, name); - case rds2cpp::SEXPType::VEC: - return _contains_attr_(parsed, name); - case rds2cpp::SEXPType::S4: - return _contains_attr_(parsed, name); - default: - break; - } - return -1; -} - -template -uintptr_t _load_attr_idx_(const rds2cpp::RObject* ptr, int i) { - auto aptr = static_cast(ptr); - if (static_cast(i) >= aptr->attributes.values.size()) { - throw std::runtime_error("requested attribute index " + std::to_string(i) + " is out of range"); - } - const auto& chosen = aptr->attributes.values[i]; - return reinterpret_cast(chosen.get()); -} - -inline uintptr_t parse_robject_load_attribute_by_index(uintptr_t ptr, int i) { - auto parsed = reinterpret_cast(ptr); - switch (parsed->type()) { - case rds2cpp::SEXPType::INT: - return _load_attr_idx_(parsed, i); - case rds2cpp::SEXPType::REAL: - return _load_attr_idx_(parsed, i); - case rds2cpp::SEXPType::LGL: - return _load_attr_idx_(parsed, i); - case rds2cpp::SEXPType::STR: - return _load_attr_idx_(parsed, i); - case rds2cpp::SEXPType::VEC: - return _load_attr_idx_(parsed, i); - case rds2cpp::SEXPType::S4: - return _load_attr_idx_(parsed, i); - default: - break; - } - - throw std::runtime_error("unsupported R object type"); - return _load_attr_idx_(parsed, i); // avoid compiler warnings. -} - -inline uintptr_t parse_robject_load_attribute_by_name(uintptr_t ptr, std::string name) { - auto parsed = reinterpret_cast(ptr); - int idx = parse_robject_find_attribute(ptr, name); - if (idx < 0) { - throw std::runtime_error("no attribute named '" + name + "'"); - } - return parse_robject_load_attribute_by_index(ptr, idx); -} - -inline uintptr_t parse_robject_load_vec_element(uintptr_t ptr, int i) { - auto parsed = reinterpret_cast(ptr); - if (parsed->type() != rds2cpp::SEXPType::VEC) { - throw std::runtime_error("cannot return list element for non-list R object"); - } - auto lptr = static_cast(parsed); - return reinterpret_cast(lptr->data[i].get()); -} - -inline std::string parse_robject_class_name(uintptr_t ptr) { - auto parsed = reinterpret_cast(ptr); - if (parsed->type() != rds2cpp::SEXPType::S4) { - throw std::runtime_error("cannot return class name for non-S4 R object"); - } - auto sptr = static_cast(parsed); - return sptr->class_name; -} - -inline std::string parse_robject_package_name(uintptr_t ptr) { - auto parsed = reinterpret_cast(ptr); - if (parsed->type() != rds2cpp::SEXPType::S4) { - throw std::runtime_error("cannot return class name for non-S4 R object"); - } - auto sptr = static_cast(parsed); - return sptr->package_name; -} - -inline std::pair parse_robject_dimensions(uintptr_t ptr) { - auto dimobj = reinterpret_cast(ptr); - if (dimobj->type() != rds2cpp::SEXPType::INT) { - throw std::runtime_error("expected matrix dimensions to be integer"); - } - - auto dimvec = static_cast(dimobj); - const auto& dims = dimvec->data; - if (dims.size() != 2) { - throw std::runtime_error("expected matrix dimensions to be of length 2"); - } - if (dims[0] < 0 || dims[1] < 0) { - throw std::runtime_error("expected all matrix dimensions to be non-negative"); - } - - return std::pair(dims[0], dims[1]); -} diff --git a/src/rds2py/parser.py b/src/rds2py/parser.py index bbcfa29..e8aad9c 100644 --- a/src/rds2py/parser.py +++ b/src/rds2py/parser.py @@ -1,6 +1,6 @@ from typing import Dict, MutableMapping -from .core import PyParsedObject +from .PyRdsReader import PyRdsParser __author__ = "jkanche" __copyright__ = "jkanche" @@ -16,9 +16,8 @@ def read_rds(file: str) -> Dict: Returns: MutableMapping: R object as a python dictionary. """ - parsed_obj = PyParsedObject(file) - robject_obj = parsed_obj.get_robject() - realized = robject_obj.realize_value() + parsed_obj = PyRdsParser(file) + realized = parsed_obj.parse() return realized diff --git a/test.py b/test.py deleted file mode 100644 index f3d4aca..0000000 --- a/test.py +++ /dev/null @@ -1,36 +0,0 @@ -import rds2py - -# parsed_obj = rds2py.PyParsedObject("tests/data/s4_dense_matrix.rds") -robj = rds2py.read_rds("tests/test.rds") -# print(parsed_obj) - -# robject_obj = parsed_obj.get_robject() -# # print(robject_obj) - -# # print(robject_obj.get_rtype()) - -# actual_arrau = robject_obj.realize_value() -# # print(actual_arrau) - -# # mat = rds2py.as_dense_matrix(actual_arrau) -# # print(mat) - -# # print("############") -# # print(actual_arrau["class_name"]) -# # print(robject_obj) - -# # print(actual_arrau["class_name"]) -# # print(robject_obj) - -sce = rds2py.as_summarized_experiment(robj) -print(sce) - -# actual_arrau_names = robject_obj.get_attribute_names() -# print(actual_arrau_names) - - -# attr_values = robject_obj.realize_attr_value() -# print(attr_values) - -# dims = robject_obj.get_dimensions() -# print(dims) diff --git a/tests/test_atomic-attr.py b/tests/test_atomic-attr.py index 3a065df..95bb708 100644 --- a/tests/test_atomic-attr.py +++ b/tests/test_atomic-attr.py @@ -1,6 +1,6 @@ import pytest -from rds2py.core import PyParsedObject +from rds2py.PyRdsReader import PyRdsParser __author__ = "jkanche" __copyright__ = "jkanche" @@ -8,13 +8,11 @@ def test_read_atomic_attrs(): - parsed_obj = PyParsedObject("tests/data/atomic_attr.rds") - robject_obj = parsed_obj.get_robject() - array = robject_obj.realize_value() - attr_names = robject_obj.get_attribute_names() - attr_values = robject_obj.realize_attr_value() + parsed_obj = PyRdsParser("tests/data/atomic_attr.rds") + data = parsed_obj.parse() + print(data) - assert array is not None - assert len(array) > 0 - assert len(attr_names) is not None - assert len(attr_values) is not None + assert data is not None + assert len(data["data"]) > 0 + assert len(data["attributes"]) > 0 + assert len(data["attributes"]["names"]["data"]) == 1000 diff --git a/tests/test_atomic-bool.py b/tests/test_atomic-bool.py index 02bfa78..e57fe27 100644 --- a/tests/test_atomic-bool.py +++ b/tests/test_atomic-bool.py @@ -1,6 +1,6 @@ import pytest -from rds2py.core import PyParsedObject +from rds2py.PyRdsReader import PyRdsParser __author__ = "jkanche" __copyright__ = "jkanche" @@ -8,18 +8,16 @@ def test_read_atomic_logical(): - parsed_obj = PyParsedObject("tests/data/atomic_logical.rds") - robject_obj = parsed_obj.get_robject() - array = robject_obj.realize_value() + parsed_obj = PyRdsParser("tests/data/atomic_logical.rds") + array = parsed_obj.parse() assert array is not None assert array["data"].shape[0] > 0 def test_read_atomic_logical_na(): - parsed_obj = PyParsedObject("tests/data/atomic_logical_wNA.rds") - robject_obj = parsed_obj.get_robject() - array = robject_obj.realize_value() + parsed_obj = PyRdsParser("tests/data/atomic_logical_wNA.rds") + array = parsed_obj.parse() assert array is not None assert array["data"].shape[0] > 0 diff --git a/tests/test_atomic-double.py b/tests/test_atomic-double.py index b731cb1..f92f620 100644 --- a/tests/test_atomic-double.py +++ b/tests/test_atomic-double.py @@ -1,6 +1,6 @@ import pytest -from rds2py.core import PyParsedObject +from rds2py.PyRdsReader import PyRdsParser __author__ = "jkanche" __copyright__ = "jkanche" @@ -8,9 +8,9 @@ def test_read_atomic_double(): - parsed_obj = PyParsedObject("tests/data/atomic_double.rds") - robject_obj = parsed_obj.get_robject() - array = robject_obj.realize_value() + parsed_obj = PyRdsParser("tests/data/atomic_double.rds") + array = parsed_obj.parse() assert array is not None + print(array) assert array["data"].shape[0] == 99 diff --git a/tests/test_atomic-int.py b/tests/test_atomic-int.py index 0116370..c10c7a6 100644 --- a/tests/test_atomic-int.py +++ b/tests/test_atomic-int.py @@ -1,6 +1,6 @@ import pytest -from rds2py.core import PyParsedObject +from rds2py.PyRdsReader import PyRdsParser __author__ = "jkanche" __copyright__ = "jkanche" @@ -8,9 +8,9 @@ def test_read_atomic_ints(): - parsed_obj = PyParsedObject("tests/data/atomic_ints.rds") - robject_obj = parsed_obj.get_robject() - array = robject_obj.realize_value() + parsed_obj = PyRdsParser("tests/data/atomic_ints.rds") + array = parsed_obj.parse() assert array is not None + print(array) assert array["data"].shape[0] == 112 diff --git a/tests/test_atomic-str.py b/tests/test_atomic-str.py index 44e96d0..0b4d062 100644 --- a/tests/test_atomic-str.py +++ b/tests/test_atomic-str.py @@ -1,6 +1,6 @@ import pytest -from rds2py.core import PyParsedObject +from rds2py.PyRdsReader import PyRdsParser __author__ = "jkanche" __copyright__ = "jkanche" @@ -8,18 +8,16 @@ def test_read_atomic_chars(): - parsed_obj = PyParsedObject("tests/data/atomic_chars.rds") - robject_obj = parsed_obj.get_robject() - array = robject_obj.realize_value() + parsed_obj = PyRdsParser("tests/data/atomic_chars.rds") + array = parsed_obj.parse() assert array is not None assert len(array["data"]) == 26 def test_read_atomic_chars_unicode(): - parsed_obj = PyParsedObject("tests/data/atomic_chars_unicode.rds") - robject_obj = parsed_obj.get_robject() - array = robject_obj.realize_value() + parsed_obj = PyRdsParser("tests/data/atomic_chars_unicode.rds") + array = parsed_obj.parse() assert array is not None assert len(array["data"]) == 4 diff --git a/tests/test_list.py b/tests/test_list.py index 7e8724c..fa5b012 100644 --- a/tests/test_list.py +++ b/tests/test_list.py @@ -1,6 +1,6 @@ import pytest -from rds2py.core import PyParsedObject +from rds2py.PyRdsReader import PyRdsParser __author__ = "jkanche" __copyright__ = "jkanche" @@ -8,45 +8,40 @@ def test_read_atomic_lists(): - parsed_obj = PyParsedObject("tests/data/lists.rds") - robject_obj = parsed_obj.get_robject() - array = robject_obj.realize_value() + parsed_obj = PyRdsParser("tests/data/lists.rds") + array = parsed_obj.parse() assert array is not None assert len(array) > 0 def test_read_atomic_lists_nested(): - parsed_obj = PyParsedObject("tests/data/lists_nested.rds") - robject_obj = parsed_obj.get_robject() - array = robject_obj.realize_value() + parsed_obj = PyRdsParser("tests/data/lists_nested.rds") + array = parsed_obj.parse() assert array is not None assert len(array) > 0 def test_read_atomic_lists_nested_deep(): - parsed_obj = PyParsedObject("tests/data/lists_nested_deep.rds") - robject_obj = parsed_obj.get_robject() - array = robject_obj.realize_value() + parsed_obj = PyRdsParser("tests/data/lists_nested_deep.rds") + array = parsed_obj.parse() assert array is not None assert len(array) > 0 def test_read_atomic_lists_df(): - parsed_obj = PyParsedObject("tests/data/lists_df.rds") - robject_obj = parsed_obj.get_robject() - array = robject_obj.realize_value() + parsed_obj = PyRdsParser("tests/data/lists_df.rds") + array = parsed_obj.parse() assert array is not None assert len(array) > 0 def test_read_atomic_lists_nested_deep_rownames(): - parsed_obj = PyParsedObject("tests/data/lists_df_rownames.rds") - robject_obj = parsed_obj.get_robject() - array = robject_obj.realize_value() + parsed_obj = PyRdsParser("tests/data/lists_df_rownames.rds") + array = parsed_obj.parse() assert array is not None assert len(array) > 0 diff --git a/tests/test_s4.py b/tests/test_s4.py index 26676c6..1dd7bda 100644 --- a/tests/test_s4.py +++ b/tests/test_s4.py @@ -1,6 +1,6 @@ import pytest -from rds2py.core import PyParsedObject +from rds2py.PyRdsReader import PyRdsParser __author__ = "jkanche" __copyright__ = "jkanche" @@ -8,24 +8,21 @@ def test_read_s4_class(): - parsed_obj = PyParsedObject("tests/data/s4_class.rds") - robject_obj = parsed_obj.get_robject() - array = robject_obj.realize_value() + parsed_obj = PyRdsParser("tests/data/s4_class.rds") + robject_obj = parsed_obj.parse() - assert array is not None + assert robject_obj is not None def test_read_s4_matrix(): - parsed_obj = PyParsedObject("tests/data/s4_matrix.rds") - robject_obj = parsed_obj.get_robject() - array = robject_obj.realize_value() + parsed_obj = PyRdsParser("tests/data/s4_matrix.rds") + robject_obj = parsed_obj.parse() - assert array is not None + assert robject_obj is not None def test_read_s4_matrix_dgt(): - parsed_obj = PyParsedObject("tests/data/s4_matrix_dgt.rds") - robject_obj = parsed_obj.get_robject() - array = robject_obj.realize_value() + parsed_obj = PyRdsParser("tests/data/s4_matrix_dgt.rds") + robject_obj = parsed_obj.parse() - assert array is not None + assert robject_obj is not None