diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md index 9c4f5c1f..6bc5f738 100644 --- a/.github/ISSUE_TEMPLATE/question.md +++ b/.github/ISSUE_TEMPLATE/question.md @@ -6,5 +6,3 @@ labels: question assignees: '' --- - - diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 00000000..d3aaf49b --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,55 @@ +name: Publish Python distributions to PyPI + +on: + push: + branches: + - 'master' + - 'test_deploy' + tags: + - '*' + +jobs: + build-n-publish: + name: Build and publish Python distributions to PyPI + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@master + + - name: Set up Python 3.7 + uses: actions/setup-python@v2 + with: + python-version: 3.7 + + - name: Install pypa/build + run: >- + cd python && + python -m + pip install + build + --user && + cd .. + + - name: Build a binary wheel and a source tarball + run: >- + cd python && + python -m + build + --sdist + --wheel + --outdir dist/ + . && + cd .. + + - name: Publish distribution to Test PyPI + uses: pypa/gh-action-pypi-publish@master + with: + skip_existing: true + password: ${{ secrets.test_pypi_password }} + repository_url: https://test.pypi.org/legacy/ + + - name: Publish distribution to PyPI + if: startsWith(github.ref, 'refs/tags') + uses: pypa/gh-action-pypi-publish@master + with: + password: ${{ secrets.pypi_password }} diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 00000000..ad5b9838 --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,94 @@ +name: pre-commit +on: + push: + branches-ignore: + - 'master' + +jobs: + pre-commit: + runs-on: ubuntu-latest + + steps: + - name: Cancel Previous Runs + uses: styfle/cancel-workflow-action@0.6.0 + with: + access_token: ${{ github.token }} + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Set up environment + run: | + echo "UBUNTU_VERSION=`grep DISTRIB_RELEASE /etc/lsb-release | sed 's/.*=//g'`" >> $GITHUB_ENV + mkdir -p .local/R/site-packages + echo "R_LIBS_USER=`pwd`/.local/R/site-packages" >> $GITHUB_ENV + + - name: Install system dependencies + if: runner.os == 'Linux' + run: | + sudo apt-get update -qq + sudo apt-get install -y libcurl4-openssl-dev + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: "3.8" + architecture: "x64" + + - name: Cache pre-commit + uses: actions/cache@v2 + with: + path: ~/.cache/pre-commit + key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }}- + + - name: Run pre-commit + uses: pre-commit/action@v2.0.0 + + - name: Cache R packages + uses: actions/cache@v2 + if: startsWith(runner.os, 'Linux') + with: + path: ${{env.R_LIBS_USER}} + key: precommit-${{env.UBUNTU_VERSION}}-renv-${{ hashFiles('Rmagic/.pre-commit.r_requirements.txt') }}-${{ hashFiles('Rmagic/DESCRIPTION') }}- + restore-keys: | + precommit-${{env.UBUNTU_VERSION}}-renv-${{ hashFiles('Rmagic/.pre-commit.r_requirements.txt') }}- + precommit-${{env.UBUNTU_VERSION}}-renv- + + - name: Install R packages + run: | + if (!requireNamespace("renv", quietly = TRUE)) install.packages("renv") + con = file("Rmagic/.pre-commit.r_requirements.txt", "r") + while ( length(pkg <- readLines(con, n = 1)) > 0 ) { + renv::install(pkg) + } + close(con) + if (!require("devtools")) install.packages("devtools", repos="http://cloud.r-project.org") + devtools::install_dev_deps("./Rmagic", upgrade=TRUE) + devtools::install("./Rmagic") + shell: Rscript {0} + + - name: Run pre-commit for R + run: | + cd Rmagic + git init + git add * + pre-commit run --all-files + rm -rf .git + cd .. + + - name: Commit files + if: failure() + run: | + git checkout -- .github/workflows + if [[ `git status --porcelain --untracked-files=no` ]]; then + git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" + git config --local user.name "github-actions[bot]" + git commit -m "pre-commit" -a + fi + + - name: Push changes + if: failure() + uses: ad-m/github-push-action@master + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + branch: ${{ github.ref }} diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml new file mode 100644 index 00000000..9e1bfcc2 --- /dev/null +++ b/.github/workflows/run_tests.yml @@ -0,0 +1,172 @@ +name: Unit Tests + +on: + push: + branches-ignore: + - 'test_deploy' + pull_request: + branches: + - '*' + +jobs: + + test_python: + runs-on: ${{ matrix.config.os }} + if: "!contains(github.event.head_commit.message, 'ci skip')" + + strategy: + fail-fast: false + matrix: + config: + - {name: '3.9', os: ubuntu-latest, python: '3.9' } + - {name: '3.8', os: ubuntu-latest, python: '3.8' } + - {name: '3.7', os: ubuntu-latest, python: '3.7' } + - {name: '3.6', os: ubuntu-latest, python: '3.6' } + + steps: + - name: Cancel Previous Runs + uses: styfle/cancel-workflow-action@0.6.0 + with: + access_token: ${{ github.token }} + + - name: Check Ubuntu version + run: | + echo "UBUNTU_VERSION=`grep DISTRIB_RELEASE /etc/lsb-release | sed 's/.*=//g'`" >> $GITHUB_ENV + + - uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.config.python }} + + - name: Cache Python packages + uses: actions/cache@v2 + with: + path: ${{ env.pythonLocation }} + key: ${{runner.os}}-pip-${{ env.pythonLocation }}-${{ hashFiles('python/setup.py') }} + restore-keys: ${{runner.os}}-pip-${{ env.pythonLocation }}- + + - name: Install package & dependencies + run: | + python -m pip install --upgrade pip + pip install -U wheel setuptools + pip install -U ./python[test] + python -c "import magic" + + - name: Run Python tests + run: | + cd python + nose2 -vvv + cd .. + + - name: Build docs + run: | + cd python + pip install .[doc] + cd doc + make html + cd ../.. + + - name: Coveralls + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + COVERALLS_SERVICE_NAME: github + run: | + coveralls + + - name: Upload check results on fail + if: failure() + uses: actions/upload-artifact@master + with: + name: ${{ matrix.config.name }}_results + path: check + + test_r: + runs-on: ${{ matrix.config.os }} + if: "!contains(github.event.head_commit.message, 'ci skip')" + + strategy: + fail-fast: false + matrix: + config: + - {name: 'devel', os: ubuntu-latest, r: 'devel' } + - {name: 'release', os: ubuntu-latest, r: 'release' } + + steps: + - name: Cancel Previous Runs + uses: styfle/cancel-workflow-action@0.6.0 + with: + access_token: ${{ github.token }} + + - name: Set up environment + run: | + echo "UBUNTU_VERSION=`grep DISTRIB_RELEASE /etc/lsb-release | sed 's/.*=//g'`" >> $GITHUB_ENV + mkdir -p .local/R/site-packages + echo "R_LIBS_USER=`pwd`/.local/R/site-packages" >> $GITHUB_ENV + + - uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: "3.8" + + - name: Install system dependencies + if: runner.os == 'Linux' + run: | + sudo apt-get update -qq + sudo apt-get install -y libcurl4-openssl-dev pandoc + + - name: Cache Python packages + uses: actions/cache@v2 + with: + path: ${{ env.pythonLocation }} + key: ${{runner.os}}-pip-${{ env.pythonLocation }}-${{ hashFiles('python/setup.py') }} + restore-keys: ${{runner.os}}-pip-${{ env.pythonLocation }}- + + - name: Install package & dependencies + run: | + python -m pip install --upgrade pip + pip install -U wheel setuptools + pip install -U ./python + python -c "import magic" + + - name: Set up R + id: setup-r + uses: r-lib/actions/setup-r@v1 + with: + r-version: ${{ matrix.config.r }} + + - name: Cache R packages + uses: actions/cache@v2 + if: startsWith(runner.os, 'Linux') + with: + path: ${{env.R_LIBS_USER}} + key: test-${{env.UBUNTU_VERSION}}-renv-${{ steps.setup-r.outputs.installed-r-version }}-${{ hashFiles('Rmagic/DESCRIPTION') }}- + restore-keys: | + test-${{env.UBUNTU_VERSION}}-renv-${{ steps.setup-r.outputs.installed-r-version }}- + + - name: Install R packages + run: | + if (!require("devtools")) install.packages("devtools", repos="http://cloud.r-project.org") + devtools::install_dev_deps("./Rmagic", upgrade=TRUE) + devtools::install("./Rmagic") + shell: Rscript {0} + + - name: Install tinytex + uses: r-lib/actions/setup-tinytex@v1 + + - name: Run R tests + run: | + cd Rmagic + R CMD build . + R CMD check --as-cran *.tar.gz + cd .. + + - name: Upload check results on fail + if: failure() + uses: actions/upload-artifact@master + with: + name: ${{ matrix.config.name }}_results + path: check diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..d155f0bf --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,26 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.3.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace + exclude: \.(ai|gz)$ + - repo: https://github.com/timothycrosley/isort + rev: 5.6.4 + hooks: + - id: isort + - repo: https://github.com/psf/black + rev: 20.8b1 + hooks: + - id: black + args: ['--target-version', 'py36'] + - repo: https://github.com/pre-commit/mirrors-autopep8 + rev: v1.5.4 + hooks: + - id: autopep8 + - repo: https://gitlab.com/pycqa/flake8 + rev: 3.8.4 + hooks: + - id: flake8 + additional_dependencies: ['hacking'] diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index cfba9b64..00000000 --- a/.travis.yml +++ /dev/null @@ -1,68 +0,0 @@ -# R for travis: see documentation at https://docs.travis-ci.com/user/languages/r - -language: r -sudo: required - -os: - - linux - -dist: - - xenial - -r: - - oldrel - - release - - devel - -addons: - apt: - sources: - - deadsnakes - packages: - - libhdf5-dev - - python3.6-dev - -cache: - - packages - - apt - - directories: - - $HOME/.cache/pip - -before_install: - - curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py - - sudo python3.6 get-pip.py - - mkdir -p ~/bin && export PATH=~/bin/:$PATH - - ln -s $(which python3.6) ~/bin/python - - ln -s $(which pip3.6) ~/bin/pip - - sudo pip install --upgrade pip - - pip install --user -q phate - -install: - - cd python; pip install --user -q . - - cd ../Rmagic; R -e 'install.packages("devtools", repos="http://cloud.r-project.org")' - - R -e 'install.packages("BiocManager", repos="http://cloud.r-project.org"); BiocManager::install("multtest")' - - R -e 'devtools::install_deps(dep = T, upgrade="always")' - - cd .. - -script: - - python -c "import magic" - - cd Rmagic; R CMD build . - - R CMD check *tar.gz - - cd ../python; pip install --user -q .[test] - - if [ "$TRAVIS_PYTHON_VERSION" != "3.5" ]; then black . --check --diff -t py35; fi - - python setup.py test - - pip install --user -q .[doc] - - cd doc; make html; cd .. - -deploy: - provider: pypi - user: scottgigante - password: ${PYPI_PASSWORD} - distributions: sdist bdist_wheel - skip_existing: true - skip_cleanup: true - on: - tags: true - branch: master - -warnings_are_errors: true diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 17c75d4b..11bfb4a2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -25,7 +25,7 @@ Code Style and Testing Contributors are encouraged to write tests for their code, but if you do not know how to do so, please do not feel discouraged from contributing code! Others can always help you test your contribution. -Python code style is dictated by [`black`](https://pypi.org/project/black/#installation-and-usage). To automatically reformat your code when you run `git commit`, you can run `./autoblack.sh` in the root directory of this project to add a hook to your `git` repository. +Code style is dictated by [`black`](https://pypi.org/project/black/#installation-and-usage) and [OpenStack](https://docs.openstack.org/hacking/latest/user/hacking.html#styleguide). Styling is automatically applied by [`pre-commit`](https://github.com/pre-commit/pre-commit). Code of Conduct --------------- diff --git a/LICENSE b/LICENSE index 8cdb8451..23cb7903 100644 --- a/LICENSE +++ b/LICENSE @@ -337,4 +337,3 @@ proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. - diff --git a/README.md b/README.md index c4225ba6..801c1d6b 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ Markov Affinity-based Graph Imputation of Cells (MAGIC) [![Latest PyPI version](https://img.shields.io/pypi/v/magic-impute.svg)](https://pypi.org/project/magic-impute/) [![Latest CRAN version](https://img.shields.io/cran/v/Rmagic.svg)](https://cran.r-project.org/package=Rmagic) -[![Travis CI Build](https://api.travis-ci.com/KrishnaswamyLab/MAGIC.svg?branch=master)](https://travis-ci.com/KrishnaswamyLab/MAGIC) +[![GitHub Actions Build](https://img.shields.io/github/workflow/status/KrishnaswamyLab/MAGIC/Unit%20Tests/master?label=Github%20Actions)](https://github.com/KrishnaswamyLab/MAGIC/actions) [![Read the Docs](https://img.shields.io/readthedocs/magic.svg)](https://magic.readthedocs.io/) [![Cell Publication DOI](https://zenodo.org/badge/DOI/10.1016/j.cell.2018.05.061.svg)](https://www.cell.com/cell/abstract/S0092-8674(18)30724-4) [![Twitter](https://img.shields.io/twitter/follow/KrishnaswamyLab.svg?style=social&label=Follow)](https://twitter.com/KrishnaswamyLab) @@ -17,13 +17,13 @@ To see how MAGIC can be applied to single-cell RNA-seq, elucidating the epitheli MAGIC has been implemented in Python, Matlab, and R. -#### To get started immediately, check out our tutorials: -##### Python -* [Epithelial-to-Mesenchymal Transition Tutorial](http://nbviewer.jupyter.org/github/KrishnaswamyLab/MAGIC/blob/master/python/tutorial_notebooks/emt_tutorial.ipynb) -* [Bone Marrow Tutorial](http://nbviewer.jupyter.org/github/KrishnaswamyLab/MAGIC/blob/master/python/tutorial_notebooks/bonemarrow_tutorial.ipynb) -##### R -* [Epithelial-to-Mesenchymal Transition Tutorial](http://htmlpreview.github.io/?https://github.com/KrishnaswamyLab/MAGIC/blob/master/Rmagic/inst/examples/emt_tutorial.html) -* [Bone Marrow Tutorial](http://htmlpreview.github.io/?https://github.com/KrishnaswamyLab/MAGIC/blob/master/Rmagic/inst/examples/bonemarrow_tutorial.html) +#### To get started immediately, check out our tutorials: +##### Python +* [Epithelial-to-Mesenchymal Transition Tutorial](http://nbviewer.jupyter.org/github/KrishnaswamyLab/MAGIC/blob/master/python/tutorial_notebooks/emt_tutorial.ipynb) +* [Bone Marrow Tutorial](http://nbviewer.jupyter.org/github/KrishnaswamyLab/MAGIC/blob/master/python/tutorial_notebooks/bonemarrow_tutorial.ipynb) +##### R +* [Epithelial-to-Mesenchymal Transition Tutorial](http://htmlpreview.github.io/?https://github.com/KrishnaswamyLab/MAGIC/blob/master/Rmagic/inst/examples/emt_tutorial.html) +* [Bone Marrow Tutorial](http://htmlpreview.github.io/?https://github.com/KrishnaswamyLab/MAGIC/blob/master/Rmagic/inst/examples/bonemarrow_tutorial.html)

diff --git a/Rmagic/.Rbuildignore b/Rmagic/.Rbuildignore index b0dc269e..685fd5c5 100644 --- a/Rmagic/.Rbuildignore +++ b/Rmagic/.Rbuildignore @@ -1,3 +1,4 @@ ^data-raw$ ^tests$ ^README\.Rmd$ +^.pre\-commit.*$ diff --git a/Rmagic/.pre-commit-config.yaml b/Rmagic/.pre-commit-config.yaml new file mode 100644 index 00000000..3aa91165 --- /dev/null +++ b/Rmagic/.pre-commit-config.yaml @@ -0,0 +1,22 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.3.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace + exclude: \.(ai|gz)$ + - repo: https://github.com/lorenzwalthert/precommit + rev: v0.1.3 + hooks: + - id: parsable-R + - id: no-browser-statement + - id: readme-rmd-rendered + - id: deps-in-desc + exclude: data\-raw + - id: use-tidy-description + - id: style-files + - id: lintr + args: [--warn_only] + verbose: true + - id: roxygenize diff --git a/Rmagic/.pre-commit.r_requirements.txt b/Rmagic/.pre-commit.r_requirements.txt new file mode 100644 index 00000000..79921314 --- /dev/null +++ b/Rmagic/.pre-commit.r_requirements.txt @@ -0,0 +1,6 @@ +docopt +styler +git2r +lintr +roxygen2 +precommit diff --git a/Rmagic/DESCRIPTION b/Rmagic/DESCRIPTION index 8ca343f3..bca3bb97 100644 --- a/Rmagic/DESCRIPTION +++ b/Rmagic/DESCRIPTION @@ -1,26 +1,39 @@ -Package: Rmagic Type: Package +Package: Rmagic Title: MAGIC - Markov Affinity-Based Graph Imputation of Cells Version: 2.0.3.999 -Authors@R: c(person(given = "David", family = "van Dijk", email = "davidvandijk@gmail.com", role = c("aut")), - person(given = 'Scott', family = 'Gigante', email = 'scott.gigante@yale.edu', role = 'cre', - comment = c(ORCID = '0000-0002-4544-2764'))) +Authors@R: + c(person(given = "David", + family = "van Dijk", + role = "aut", + email = "davidvandijk@gmail.com"), + person(given = "Scott", + family = "Gigante", + role = "cre", + email = "scott.gigante@yale.edu", + comment = c(ORCID = "0000-0002-4544-2764"))) Maintainer: Scott Gigante -Description: MAGIC (Markov affinity-based graph imputation of cells) is a method for addressing technical noise in single-cell data, including under-sampling of mRNA molecules, often termed "dropout" which can severely obscure important gene-gene relationships. MAGIC shares information across similar cells, via data diffusion, to denoise the cell count matrix and fill in missing transcripts. Read more: van Dijk et al. (2018) . +Description: MAGIC (Markov affinity-based graph imputation of cells) is a + method for addressing technical noise in single-cell data, including + under-sampling of mRNA molecules, often termed "dropout" which can + severely obscure important gene-gene relationships. MAGIC shares + information across similar cells, via data diffusion, to denoise the + cell count matrix and fill in missing transcripts. Read more: van Dijk + et al. (2018) . +License: GPL-2 | file LICENSE Depends: - R (>= 3.3), - Matrix (>= 1.2-0) + Matrix (>= 1.2-0), + R (>= 3.3) Imports: + ggplot2, methods, - stats, reticulate (>= 1.4), - ggplot2 + stats Suggests: - Seurat (>= 3.0.0), + phateR, readr, - viridis, - phateR -License: GPL-2 | file LICENSE -LazyData: true -RoxygenNote: 7.0.2 + Seurat (>= 3.0.0), + viridis Encoding: UTF-8 +LazyData: true +RoxygenNote: 7.1.1 diff --git a/Rmagic/LICENSE b/Rmagic/LICENSE index 8cdb8451..23cb7903 100644 --- a/Rmagic/LICENSE +++ b/Rmagic/LICENSE @@ -337,4 +337,3 @@ proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. - diff --git a/Rmagic/R/magic.R b/Rmagic/R/magic.R index e5ff0d90..03374ca4 100644 --- a/Rmagic/R/magic.R +++ b/Rmagic/R/magic.R @@ -55,82 +55,79 @@ #' #' @examples #' if (pymagic_is_available()) { -#' -#' data(magic_testdata) -#' -#' # Run MAGIC -#' data_magic <- magic(magic_testdata, genes=c("VIM", "CDH1", "ZEB1")) -#' summary(data_magic) -#' ## CDH1 VIM ZEB1 -#' ## Min. :0.4303 Min. :3.854 Min. :0.01111 -#' ## 1st Qu.:0.4444 1st Qu.:3.947 1st Qu.:0.01145 -#' ## Median :0.4462 Median :3.964 Median :0.01153 -#' ## Mean :0.4461 Mean :3.965 Mean :0.01152 -#' ## 3rd Qu.:0.4478 3rd Qu.:3.982 3rd Qu.:0.01160 -#' ## Max. :0.4585 Max. :4.127 Max. :0.01201 -#' -#' # Plot the result with ggplot2 -#' if (require(ggplot2)) { -#' ggplot(data_magic) + -#' geom_point(aes(x=VIM, y=CDH1, color=ZEB1)) -#' } -#' -#' # Run MAGIC again returning all genes -#' # We use the last run as initialization -#' data_magic <- magic(magic_testdata, genes="all_genes", init=data_magic) -#' # Extract the smoothed data matrix to use in downstream analysis -#' data_smooth <- as.matrix(data_magic) -#' +#' data(magic_testdata) +#' +#' # Run MAGIC +#' data_magic <- magic(magic_testdata, genes = c("VIM", "CDH1", "ZEB1")) +#' summary(data_magic) +#' ## CDH1 VIM ZEB1 +#' ## Min. :0.4303 Min. :3.854 Min. :0.01111 +#' ## 1st Qu.:0.4444 1st Qu.:3.947 1st Qu.:0.01145 +#' ## Median :0.4462 Median :3.964 Median :0.01153 +#' ## Mean :0.4461 Mean :3.965 Mean :0.01152 +#' ## 3rd Qu.:0.4478 3rd Qu.:3.982 3rd Qu.:0.01160 +#' ## Max. :0.4585 Max. :4.127 Max. :0.01201 +#' +#' # Plot the result with ggplot2 +#' if (require(ggplot2)) { +#' ggplot(data_magic) + +#' geom_point(aes(x = VIM, y = CDH1, color = ZEB1)) +#' } +#' +#' # Run MAGIC again returning all genes +#' # We use the last run as initialization +#' data_magic <- magic(magic_testdata, genes = "all_genes", init = data_magic) +#' # Extract the smoothed data matrix to use in downstream analysis +#' data_smooth <- as.matrix(data_magic) #' } #' #' if (pymagic_is_available() && require(Seurat)) { +#' data(magic_testdata) #' -#' data(magic_testdata) +#' # Create a Seurat object +#' seurat_object <- CreateSeuratObject(counts = t(magic_testdata), assay = "RNA") +#' seurat_object <- NormalizeData(object = seurat_object) +#' seurat_object <- ScaleData(object = seurat_object) #' -#' # Create a Seurat object -#' seurat_object <- CreateSeuratObject(counts = t(magic_testdata), assay="RNA") -#' seurat_object <- NormalizeData(object = seurat_object) -#' seurat_object <- ScaleData(object = seurat_object) -#' -#' # Run MAGIC and reset the active assay -#' seurat_object <- magic(seurat_object) -#' seurat_object@active.assay = "MAGIC_RNA" -#' -#' # Analyze with Seurat -#' VlnPlot(seurat_object, features=c("VIM", "ZEB1", "CDH1")) +#' # Run MAGIC and reset the active assay +#' seurat_object <- magic(seurat_object) +#' seurat_object@active.assay <- "MAGIC_RNA" #' +#' # Analyze with Seurat +#' VlnPlot(seurat_object, features = c("VIM", "ZEB1", "CDH1")) #' } -#' #' @export #' magic <- function(data, ...) { - UseMethod(generic = 'magic', object = data) + UseMethod(generic = "magic", object = data) } #' @rdname magic #' @export #' magic.default <- function( - data, - genes = NULL, - knn = 5, - knn.max = NULL, - decay = 1, - t = 3, - npca = 100, - solver = 'exact', - init = NULL, - t.max = 20, - knn.dist.method = 'euclidean', - verbose = 1, - n.jobs = 1, - seed = NULL, - # deprecated args - k=NULL, alpha=NULL, - ... -) { + data, + genes = NULL, + knn = 5, + knn.max = NULL, + decay = 1, + t = 3, + npca = 100, + solver = "exact", + init = NULL, + t.max = 20, + knn.dist.method = "euclidean", + verbose = 1, + n.jobs = 1, + seed = NULL, + # deprecated args + k = NULL, alpha = NULL, + ...) { # check installation - if (!reticulate::py_module_available(module = "magic") || (is.null(pymagic))) load_pymagic() + if (!reticulate::py_module_available(module = "magic") || + (is.null(pymagic))) { + load_pymagic() + } # check for deprecated arguments if (!is.null(k)) { message("Argument k is deprecated. Using knn instead.") @@ -149,11 +146,11 @@ magic.default <- function( seed <- check.int.or.null(seed) verbose <- check.int.or.null(verbose) decay <- check.double.or.null(decay) - t <- check.int.or.string(t, 'auto') + t <- check.int.or.string(t, "auto") if (!methods::is(object = data, "Matrix")) { data <- as.matrix(x = data) } - if (is.null(x = genes) || is.na(x = genes)) { + if (length(genes) <= 1 && (is.null(x = genes) || is.na(x = genes))) { genes <- NULL gene_names <- colnames(x = data) } else if (is.numeric(x = genes)) { @@ -166,7 +163,12 @@ magic.default <- function( } else { # character vector if (!all(genes %in% colnames(x = data))) { - warning(paste0("Genes ", genes[!(genes %in% colnames(data))], " not found.", collapse = ", ")) + warning(paste0( + "Genes ", + genes[!(genes %in% colnames(data))], + " not found.", + collapse = ", " + )) } genes <- which(x = colnames(x = data) %in% genes) gene_names <- colnames(x = data)[genes] @@ -228,11 +230,11 @@ magic.default <- function( colnames(x = result) <- gene_names rownames(x = result) <- rownames(data) result <- as.data.frame(x = result) - result <- list( - "result" = result, - "operator" = operator, - "params" = params - ) + result <- list( + "result" = result, + "operator" = operator, + "params" = params + ) class(x = result) <- c("magic", "list") return(result) } @@ -242,22 +244,21 @@ magic.default <- function( #' @method magic seurat #' magic.seurat <- function( - data, - genes = NULL, - knn = 5, - knn.max = NULL, - decay = 1, - t = 3, - npca = 100, - solver = "exact", - init = NULL, - t.max = 20, - knn.dist.method = 'euclidean', - verbose = 1, - n.jobs = 1, - seed = NULL, - ... -) { + data, + genes = NULL, + knn = 5, + knn.max = NULL, + decay = 1, + t = 3, + npca = 100, + solver = "exact", + init = NULL, + t.max = 20, + knn.dist.method = "euclidean", + verbose = 1, + n.jobs = 1, + seed = NULL, + ...) { if (requireNamespace("Seurat", quietly = TRUE)) { results <- magic( data = as.matrix(x = t(x = data@data)), @@ -307,29 +308,28 @@ magic.seurat <- function( #' @method magic Seurat #' magic.Seurat <- function( - data, - assay = NULL, - genes = NULL, - knn = 5, - knn.max = NULL, - decay = 1, - t = 3, - npca = 100, - solver = 'exact', - init = NULL, - t.max = 20, - knn.dist.method = 'euclidean', - verbose = 1, - n.jobs = 1, - seed = NULL, - ... -) { + data, + assay = NULL, + genes = NULL, + knn = 5, + knn.max = NULL, + decay = 1, + t = 3, + npca = 100, + solver = "exact", + init = NULL, + t.max = 20, + knn.dist.method = "euclidean", + verbose = 1, + n.jobs = 1, + seed = NULL, + ...) { if (requireNamespace("Seurat", quietly = TRUE)) { if (is.null(x = assay)) { assay <- Seurat::DefaultAssay(object = data) } results <- magic( - data = t(x = Seurat::GetAssayData(object = data, slot = 'data', assay = assay)), + data = t(x = Seurat::GetAssayData(object = data, slot = "data", assay = assay)), genes = genes, knn = knn, knn.max = knn.max, @@ -345,11 +345,20 @@ magic.Seurat <- function( seed = seed, ... ) - assay_name <- paste0('MAGIC_', assay) - data[[assay_name]] <- Seurat::CreateAssayObject(data = t(x = as.matrix(x = results$result))) - print(paste0("Added MAGIC output to ", assay_name, ". To use it, pass assay='", assay_name, - "' to downstream methods or set DefaultAssay(seurat_object) <- '", assay_name, "'.")) - Seurat::Tool(object = data) <- results[c('operator', 'params')] + assay_name <- paste0("MAGIC_", assay) + data[[assay_name]] <- Seurat::CreateAssayObject( + data = t(x = as.matrix(x = results$result)) + ) + print(paste0( + "Added MAGIC output to ", + assay_name, + ". To use it, pass assay='", + assay_name, + "' to downstream methods or set DefaultAssay(seurat_object) <- '", + assay_name, + "'." + )) + Seurat::Tool(object = data) <- results[c("operator", "params")] return(data) } else { message("Seurat package not available. Running default MAGIC implementation.") @@ -380,26 +389,26 @@ magic.Seurat <- function( #' @param ... Arguments for print() #' @examples #' if (pymagic_is_available()) { -#' -#' data(magic_testdata) -#' data_magic <- magic(magic_testdata) -#' print(data_magic) -#' ## MAGIC with elements -#' ## $result : (500, 197) -#' ## $operator : Python MAGIC operator -#' ## $params : list with elements (data, knn, decay, t, npca, knn.dist.method) -#' +#' data(magic_testdata) +#' data_magic <- magic(magic_testdata) +#' print(data_magic) +#' ## MAGIC with elements +#' ## $result : (500, 197) +#' ## $operator : Python MAGIC operator +#' ## $params : list with elements (data, knn, decay, t, npca, knn.dist.method) #' } #' @rdname print #' @method print magic #' @export print.magic <- function(x, ...) { - result <- paste0("MAGIC with elements\n", - " $result : (", nrow(x$result), ", ", - ncol(x$result), ")\n", - " $operator : Python MAGIC operator\n", - " $params : list with elements (", - paste(names(x$params), collapse = ", "), ")") + result <- paste0( + "MAGIC with elements\n", + " $result : (", nrow(x$result), ", ", + ncol(x$result), ")\n", + " $operator : Python MAGIC operator\n", + " $params : list with elements (", + paste(names(x$params), collapse = ", "), ")" + ) cat(result) } @@ -409,18 +418,16 @@ print.magic <- function(x, ...) { #' @param ... Arguments for summary() #' @examples #' if (pymagic_is_available()) { -#' -#' data(magic_testdata) -#' data_magic <- magic(magic_testdata) -#' summary(data_magic) -#' ## ZEB1 -#' ## Min. :0.01071 -#' ## 1st Qu.:0.01119 -#' ## Median :0.01130 -#' ## Mean :0.01129 -#' ## 3rd Qu.:0.01140 -#' ## Max. :0.01201 -#' +#' data(magic_testdata) +#' data_magic <- magic(magic_testdata) +#' summary(data_magic) +#' ## ZEB1 +#' ## Min. :0.01071 +#' ## 1st Qu.:0.01119 +#' ## Median :0.01130 +#' ## Mean :0.01129 +#' ## 3rd Qu.:0.01140 +#' ## Max. :0.01201 #' } #' @rdname summary #' @method summary magic @@ -463,12 +470,10 @@ as.data.frame.magic <- function(x, ...) { #' @param ... Arguments for ggplot() #' @examples #' if (pymagic_is_available() && require(ggplot2)) { -#' -#' data(magic_testdata) -#' data_magic <- magic(magic_testdata, genes=c("VIM", "CDH1", "ZEB1")) -#' ggplot(data_magic, aes(VIM, CDH1, colour=ZEB1)) + -#' geom_point() -#' +#' data(magic_testdata) +#' data_magic <- magic(magic_testdata, genes = c("VIM", "CDH1", "ZEB1")) +#' ggplot(data_magic, aes(VIM, CDH1, colour = ZEB1)) + +#' geom_point() #' } #' @rdname ggplot #' @method ggplot magic diff --git a/Rmagic/R/magic_testdata.R b/Rmagic/R/magic_testdata.R index 035d98d0..34cd3f45 100644 --- a/Rmagic/R/magic_testdata.R +++ b/Rmagic/R/magic_testdata.R @@ -5,4 +5,4 @@ #' @format A matrix with 500 rows and 197 variables #' #' @source The authors -"magic_testdata" \ No newline at end of file +"magic_testdata" diff --git a/Rmagic/R/preprocessing.R b/Rmagic/R/preprocessing.R index b8eb7bd2..ac64b9d5 100644 --- a/Rmagic/R/preprocessing.R +++ b/Rmagic/R/preprocessing.R @@ -12,7 +12,7 @@ #' @import Matrix #' #' @export -library.size.normalize <- function(data, verbose=FALSE) { +library.size.normalize <- function(data, verbose = FALSE) { if (verbose) { message(paste0( "Normalizing library sizes for ", diff --git a/Rmagic/R/utils.R b/Rmagic/R/utils.R index 3b58de65..89136684 100644 --- a/Rmagic/R/utils.R +++ b/Rmagic/R/utils.R @@ -14,19 +14,37 @@ null_equal <- function(x, y) { #' @importFrom utils packageVersion #' @export check_pymagic_version <- function() { - pyversion <- strsplit(pymagic$`__version__`, '\\.')[[1]] - rversion <- strsplit(as.character(packageVersion("Rmagic")), '\\.')[[1]] + pyversion <- strsplit(pymagic$`__version__`, "\\.")[[1]] + rversion <- strsplit(as.character(packageVersion("Rmagic")), "\\.")[[1]] major_version <- as.integer(rversion[1]) minor_version <- as.integer(rversion[2]) if (as.integer(pyversion[1]) < major_version) { - warning(paste0("Python MAGIC version ", pymagic$`__version__`, " is out of date (recommended: ", - major_version, ".", minor_version, "). Please update with pip ", - "(e.g. ", reticulate::py_config()$python, " -m pip install --upgrade magic-impute) or Rmagic::install.magic().")) + warning(paste0( + "Python MAGIC version ", + pymagic$`__version__`, + " is out of date (recommended: ", + major_version, + ".", + minor_version, + "). Please update with pip ", + "(e.g. ", + reticulate::py_config()$python, + " -m pip install --upgrade magic-impute) or Rmagic::install.magic()." + )) return(FALSE) } else if (as.integer(pyversion[2]) < minor_version) { - warning(paste0("Python MAGIC version ", pymagic$`__version__`, " is out of date (recommended: ", - major_version, ".", minor_version, "). Consider updating with pip ", - "(e.g. ", reticulate::py_config()$python, " -m pip install --upgrade magic-impute) or Rmagic::install.magic().")) + warning(paste0( + "Python MAGIC version ", + pymagic$`__version__`, + " is out of date (recommended: ", + major_version, + ".", + minor_version, + "). Consider updating with pip ", + "(e.g. ", + reticulate::py_config()$python, + " -m pip install --upgrade magic-impute) or Rmagic::install.magic()." + )) return(FALSE) } return(TRUE) @@ -37,9 +55,9 @@ failed_pymagic_import <- function(e) { message(e) result <- as.character(e) if (length(grep("ModuleNotFoundError: No module named 'magic'", result)) > 0 || - length(grep("ImportError: No module named magic", result)) > 0) { + length(grep("ImportError: No module named magic", result)) > 0) { # not installed - if (utils::menu(c("Yes", "No"), title="Install MAGIC Python package with reticulate?") == 1) { + if (utils::menu(c("Yes", "No"), title = "Install MAGIC Python package with reticulate?") == 1) { install.magic() } } else if (length(grep("r\\-reticulate", reticulate::py_config()$python)) > 0) { @@ -54,7 +72,7 @@ failed_pymagic_import <- function(e) { } load_pymagic <- function() { - delay_load = list(on_load=check_pymagic_version, on_error=failed_pymagic_import) + delay_load <- list(on_load = check_pymagic_version, on_error = failed_pymagic_import) # load if (is.null(pymagic)) { # first time load @@ -66,20 +84,21 @@ load_pymagic <- function() { } #' Check whether MAGIC Python package is available and can be loaded -#' +#' #' This is used primarily to avoid running tests on CRAN #' and elsewhere where the Python package should not be #' installed. -#' +#' #' @export pymagic_is_available <- function() { - tryCatch({ - reticulate::import("magic")$MAGIC - check_pymagic_version() - }, - error = function(e) { - FALSE - } + tryCatch( + { + reticulate::import("magic")$MAGIC + check_pymagic_version() + }, + error = function(e) { + FALSE + } ) } @@ -104,21 +123,22 @@ pymagic_is_available <- function() { #' #' @export install.magic <- function(envname = "r-reticulate", method = "auto", - conda = "auto", pip=TRUE, ...) { + conda = "auto", pip = TRUE, ...) { message("Attempting to install MAGIC python package with reticulate") - tryCatch({ - reticulate::py_install("magic-impute", - envname = envname, method = method, - conda = conda, pip=pip, ... - ) - message("Install complete. Please restart R and try again.") - }, - error = function(e) { - stop(paste0( - "Cannot locate MAGIC Python package, please install through pip ", - "(e.g. ", reticulate::py_config()$python, " -m pip install magic-impute) and then restart R." - )) - } + tryCatch( + { + reticulate::py_install("magic-impute", + envname = envname, method = method, + conda = conda, pip = pip, ... + ) + message("Install complete. Please restart R and try again.") + }, + error = function(e) { + stop(paste0( + "Cannot locate MAGIC Python package, please install through pip ", + "(e.g. ", reticulate::py_config()$python, " -m pip install magic-impute) and then restart R." + )) + } ) } @@ -134,7 +154,7 @@ pymagic <- NULL ###### check.int <- function(x) { - as.integer(x) + as.integer(x) } check.int.or.null <- function(x) { diff --git a/Rmagic/README.Rmd b/Rmagic/README.Rmd index bcf5f0fc..1ab80fe9 100644 --- a/Rmagic/README.Rmd +++ b/Rmagic/README.Rmd @@ -17,7 +17,7 @@ knitr::opts_chunk$set( [![Latest PyPI version](https://img.shields.io/pypi/v/magic-impute.svg)](https://pypi.org/project/magic-impute/) [![Latest CRAN version](https://img.shields.io/cran/v/Rmagic.svg)](https://cran.r-project.org/package=Rmagic) -[![Travis CI Build](https://api.travis-ci.com/KrishnaswamyLab/MAGIC.svg?branch=master)](https://travis-ci.com/KrishnaswamyLab/MAGIC) +[![GitHub Actions Build](https://img.shields.io/github/workflow/status/KrishnaswamyLab/MAGIC/Unit%20Tests/master?label=Github%20Actions)](https://github.com/KrishnaswamyLab/MAGIC/actions) [![Read the Docs](https://img.shields.io/readthedocs/magic.svg)](https://magic.readthedocs.io/) [![Cell Publication DOI](https://zenodo.org/badge/DOI/10.1016/j.cell.2018.05.061.svg)](https://www.cell.com/cell/abstract/S0092-8674(18)30724-4) [![Twitter](https://img.shields.io/twitter/follow/KrishnaswamyLab.svg?style=social&label=Follow)](https://twitter.com/KrishnaswamyLab) @@ -130,7 +130,7 @@ The example data is located in the MAGIC R package. ```{r load_data} # load data data(magic_testdata) -magic_testdata[1:5,1:10] +magic_testdata[1:5, 1:10] ``` ### Running MAGIC @@ -139,23 +139,23 @@ Running MAGIC is as simple as running the `magic` function. ```{r run_magic} # run MAGIC -data_MAGIC <- magic(magic_testdata, genes=c("VIM", "CDH1", "ZEB1")) +data_MAGIC <- magic(magic_testdata, genes = c("VIM", "CDH1", "ZEB1")) ``` We can plot the data before and after MAGIC to visualize the results. ```{r plot_raw} ggplot(magic_testdata) + - geom_point(aes(VIM, CDH1, colour=ZEB1)) + - scale_colour_viridis(option="B") + geom_point(aes(VIM, CDH1, colour = ZEB1)) + + scale_colour_viridis(option = "B") ``` The data suffers from dropout to the point that we cannot infer anything about the gene-gene relationships. ```{r plot_magic} ggplot(data_MAGIC) + - geom_point(aes(VIM, CDH1, colour=ZEB1)) + - scale_colour_viridis(option="B") + geom_point(aes(VIM, CDH1, colour = ZEB1)) + + scale_colour_viridis(option = "B") ``` As you can see, the gene-gene relationships are much clearer after MAGIC. @@ -163,17 +163,17 @@ As you can see, the gene-gene relationships are much clearer after MAGIC. The data is sometimes a little too smooth - we can decrease `t` from the automatic value to reduce the amount of diffusion. We pass the original result to the argument `init` to avoid recomputing intermediate steps. ```{r plot_reduced_t} -data_MAGIC <- magic(magic_testdata, genes=c("VIM", "CDH1", "ZEB1"), t=6, init=data_MAGIC) +data_MAGIC <- magic(magic_testdata, genes = c("VIM", "CDH1", "ZEB1"), t = 6, init = data_MAGIC) ggplot(data_MAGIC) + - geom_point(aes(VIM, CDH1, colour=ZEB1)) + - scale_colour_viridis(option="B") + geom_point(aes(VIM, CDH1, colour = ZEB1)) + + scale_colour_viridis(option = "B") ``` We can look at the entire smoothed matrix with `genes='all_genes'`, passing the original result to the argument `init` to avoid recomputing intermediate steps. Note that this matrix may be large and could take up a lot of memory. ```{r run_magic_full_matrix} -data_MAGIC <- magic(magic_testdata, genes="all_genes", t=6, init=data_MAGIC) +data_MAGIC <- magic(magic_testdata, genes = "all_genes", t = 6, init = data_MAGIC) as.data.frame(data_MAGIC)[1:5, 1:10] ``` @@ -184,9 +184,9 @@ We can visualize the results of MAGIC on PCA as follows. ```{r run_pca} data_MAGIC_PCA <- as.data.frame(prcomp(data_MAGIC)$x) ggplot(data_MAGIC_PCA) + - geom_point(aes(x=PC1, y=PC2, color=data_MAGIC$result$VIM)) + - scale_color_viridis(option="B") + - labs(color="VIM") + geom_point(aes(x = PC1, y = PC2, color = data_MAGIC$result$VIM)) + + scale_color_viridis(option = "B") + + labs(color = "VIM") ``` @@ -195,11 +195,11 @@ ggplot(data_MAGIC_PCA) + We can visualize the results of MAGIC on PHATE as follows. We set `t` and `k` manually, because this toy dataset is really too small to make sense with PHATE; however, the default values work well for single-cell genomic data. ```{r run_phate} -data_PHATE <- phate(magic_testdata, k=3, t=15) +data_PHATE <- phate(magic_testdata, k = 3, t = 15) ggplot(data_PHATE) + - geom_point(aes(x=PHATE1, y=PHATE2, color=data_MAGIC$result$VIM)) + - scale_color_viridis(option="B") + - labs(color="VIM") + geom_point(aes(x = PHATE1, y = PHATE2, color = data_MAGIC$result$VIM)) + + scale_color_viridis(option = "B") + + labs(color = "VIM") ``` ## Issues diff --git a/Rmagic/README.md b/Rmagic/README.md index 0653025b..69fb2310 100644 --- a/Rmagic/README.md +++ b/Rmagic/README.md @@ -1,95 +1,79 @@ -Rmagic -================ - -true +--- +title : Rmagic +output: github_document +toc: true +--- -[![Latest PyPI -version](https://img.shields.io/pypi/v/magic-impute.svg)](https://pypi.org/project/magic-impute/) -[![Latest CRAN -version](https://img.shields.io/cran/v/Rmagic.svg)](https://cran.r-project.org/package=Rmagic) -[![Travis CI -Build](https://api.travis-ci.com/KrishnaswamyLab/MAGIC.svg?branch=master)](https://travis-ci.com/KrishnaswamyLab/MAGIC) -[![Read the -Docs](https://img.shields.io/readthedocs/magic.svg)](https://magic.readthedocs.io/) -[![Cell Publication -DOI](https://zenodo.org/badge/DOI/10.1016/j.cell.2018.05.061.svg)](https://www.cell.com/cell/abstract/S0092-8674\(18\)30724-4) + + +[![Latest PyPI version](https://img.shields.io/pypi/v/magic-impute.svg)](https://pypi.org/project/magic-impute/) +[![Latest CRAN version](https://img.shields.io/cran/v/Rmagic.svg)](https://cran.r-project.org/package=Rmagic) +[![GitHub Actions Build](https://img.shields.io/github/workflow/status/KrishnaswamyLab/MAGIC/Unit%20Tests/master?label=Github%20Actions)](https://github.com/KrishnaswamyLab/MAGIC/actions) +[![Read the Docs](https://img.shields.io/readthedocs/magic.svg)](https://magic.readthedocs.io/) +[![Cell Publication DOI](https://zenodo.org/badge/DOI/10.1016/j.cell.2018.05.061.svg)](https://www.cell.com/cell/abstract/S0092-8674(18)30724-4) [![Twitter](https://img.shields.io/twitter/follow/KrishnaswamyLab.svg?style=social&label=Follow)](https://twitter.com/KrishnaswamyLab) -[![Github -Stars](https://img.shields.io/github/stars/KrishnaswamyLab/MAGIC.svg?style=social&label=Stars)](https://github.com/KrishnaswamyLab/MAGIC/) +[![Github Stars](https://img.shields.io/github/stars/KrishnaswamyLab/MAGIC.svg?style=social&label=Stars)](https://github.com/KrishnaswamyLab/MAGIC/) -Markov Affinity-based Graph Imputation of Cells (MAGIC) is an algorithm -for denoising and imputation of single cells applied to single-cell RNA -sequencing data, as described in Van Dijk D *et al.* (2018), *Recovering -Gene Interactions from Single-Cell Data Using Data Diffusion*, Cell -. -

+Markov Affinity-based Graph Imputation of Cells (MAGIC) is an algorithm for denoising and imputation of single cells applied to single-cell RNA sequencing data, as described in Van Dijk D *et al.* (2018), *Recovering Gene Interactions from Single-Cell Data Using Data Diffusion*, Cell . +

-
Magic reveals the interaction between Vimentin (VIM), Cadherin-1 -(CDH1), and Zinc finger E-box-binding homeobox 1 (ZEB1, encoded by -colors). - +
+Magic reveals the interaction between Vimentin (VIM), Cadherin-1 (CDH1), and Zinc finger E-box-binding homeobox 1 (ZEB1, encoded by colors). +

- - MAGIC imputes missing data values on sparse data sets, restoring the - structure of the data - - It also proves dimensionality reduction and gene expression - visualizations - - MAGIC can be performed on a variety of datasets - - Here, we show the usage of MAGIC on a toy dataset - - You can view further examples of MAGIC on real data in our notebooks - under - `inst/examples`: - - - - +* MAGIC imputes missing data values on sparse data sets, restoring the structure of the data +* It also proves dimensionality reduction and gene expression visualizations +* MAGIC can be performed on a variety of datasets +* Here, we show the usage of MAGIC on a toy dataset +* You can view further examples of MAGIC on real data in our notebooks under `inst/examples`: + * http://htmlpreview.github.io/?https://github.com/KrishnaswamyLab/MAGIC/blob/master/Rmagic/inst/examples/EMT_tutorial.html + * http://htmlpreview.github.io/?https://github.com/KrishnaswamyLab/MAGIC/blob/master/Rmagic/inst/examples/bonemarrow_tutorial.html ## Table of Contents - - [Installation](#installation) - - [Installation from CRAN and - PyPi](#installation-from-cran-and-pypi) - - [Installation with devtools and - reticulate](#installation-with-devtools-and-reticulate) - - [Installation from source](#installation-from-source) - - [Quick Start](#quick-start) - - [Tutorial](#tutorial) - - [Issues](#issues) - - [FAQ](#faq) - - [Help](#help) + * [Installation](#installation) + * [Installation from CRAN and PyPi](#installation-from-cran-and-pypi) + * [Installation with devtools and reticulate](#installation-with-devtools-and-reticulate) + * [Installation from source](#installation-from-source) + * [Quick Start](#quick-start) + * [Tutorial](#tutorial) + * [Issues](#issues) + * [FAQ](#faq) + * [Help](#help) ## Installation To use MAGIC, you will need to install both the R and Python packages. -If `python` or `pip` are not installed, you will need to install them. -We recommend [Miniconda3](https://conda.io/miniconda.html) to install -Python and `pip` together, or otherwise you can install `pip` from -. +If `python` or `pip` are not installed, you will need to install them. We recommend [Miniconda3](https://conda.io/miniconda.html) to install Python and `pip` together, or otherwise you can install `pip` from https://pip.pypa.io/en/stable/installing/. #### Installation from CRAN In R, run this command to install MAGIC and all dependencies: -``` r + +```r install.packages("Rmagic") ``` -In a terminal, run the following command to install the Python -repository. +In a terminal, run the following command to install the Python repository. + -``` bash +```bash pip install --user magic-impute ``` #### Installaton from source -To install the very latest version of MAGIC, you can install from GitHub -with the following commands run in a terminal. +To install the very latest version of MAGIC, you can install from GitHub with the following commands run in a terminal. + -``` bash +```bash git clone https://github.com/KrishnaswamyLab/MAGIC cd MAGIC/python python setup.py install --user @@ -99,10 +83,10 @@ R CMD INSTALL . ## Quick Start -If you have loaded a data matrix `data` in R (cells on rows, genes on -columns) you can run PHATE as follows: +If you have loaded a data matrix `data` in R (cells on rows, genes on columns) you can run PHATE as follows: -``` r + +```r library(phateR) data_phate <- phate(data) ``` @@ -111,18 +95,19 @@ data_phate <- phate(data) #### Extra packages for the tutorial -We’ll install a couple more tools for this tutorial. +We'll install a couple more tools for this tutorial. + -``` r +```r if (!require(viridis)) install.packages("viridis") if (!require(ggplot2)) install.packages("ggplot2") if (!require(phateR)) install.packages("phateR") ``` -If you have never used PHATE, you should also install PHATE from the -command line as follows: +If you have never used PHATE, you should also install PHATE from the command line as follows: + -``` bash +```bash pip install --user phate ``` @@ -130,18 +115,18 @@ pip install --user phate We load the Rmagic package and a few others for convenience functions. -``` r + +```r library(Rmagic) #> Loading required package: Matrix library(ggplot2) -#> Warning: package 'ggplot2' was built under R version 3.5.3 library(viridis) #> Loading required package: viridisLite library(phateR) -#> +#> #> Attaching package: 'phateR' #> The following object is masked from 'package:Rmagic': -#> +#> #> library.size.normalize ``` @@ -149,125 +134,123 @@ library(phateR) The example data is located in the MAGIC R package. -``` r + +```r # load data data(magic_testdata) -magic_testdata[1:5,1:10] -#> A1BG-AS1 AAMDC AAMP AARSD1 ABCA12 ABCG2 ABHD13 -#> 6564 0.0000000 0.0000000 0.0000000 0 0 0 0.0000000 -#> 3835 0.0000000 0.8714711 0.0000000 0 0 0 0.8714711 -#> 6318 0.7739207 0.0000000 0.7739207 0 0 0 0.0000000 -#> 3284 0.0000000 0.0000000 0.0000000 0 0 0 0.0000000 -#> 1171 0.0000000 0.0000000 0.0000000 0 0 0 0.0000000 -#> AC007773.2 AC011998.4 AC013470.6 -#> 6564 0 0 0 -#> 3835 0 0 0 -#> 6318 0 0 0 -#> 3284 0 0 0 -#> 1171 0 0 0 +magic_testdata[1:5, 1:10] +#> A1BG-AS1 AAMDC AAMP AARSD1 ABCA12 ABCG2 ABHD13 AC007773.2 +#> 6564 0.0000000 0.0000000 0.0000000 0 0 0 0.0000000 0 +#> 3835 0.0000000 0.8714711 0.0000000 0 0 0 0.8714711 0 +#> 6318 0.7739207 0.0000000 0.7739207 0 0 0 0.0000000 0 +#> 3284 0.0000000 0.0000000 0.0000000 0 0 0 0.0000000 0 +#> 1171 0.0000000 0.0000000 0.0000000 0 0 0 0.0000000 0 +#> AC011998.4 AC013470.6 +#> 6564 0 0 +#> 3835 0 0 +#> 6318 0 0 +#> 3284 0 0 +#> 1171 0 0 ``` ### Running MAGIC Running MAGIC is as simple as running the `magic` function. -``` r + +```r # run MAGIC -data_MAGIC <- magic(magic_testdata, genes=c("VIM", "CDH1", "ZEB1")) +data_MAGIC <- magic(magic_testdata, genes = c("VIM", "CDH1", "ZEB1")) ``` We can plot the data before and after MAGIC to visualize the results. -``` r + +```r ggplot(magic_testdata) + - geom_point(aes(VIM, CDH1, colour=ZEB1)) + - scale_colour_viridis(option="B") + geom_point(aes(VIM, CDH1, colour = ZEB1)) + + scale_colour_viridis(option = "B") ``` - +plot of chunk plot_raw + +The data suffers from dropout to the point that we cannot infer anything about the gene-gene relationships. -The data suffers from dropout to the point that we cannot infer anything -about the gene-gene relationships. -``` r +```r ggplot(data_MAGIC) + - geom_point(aes(VIM, CDH1, colour=ZEB1)) + - scale_colour_viridis(option="B") + geom_point(aes(VIM, CDH1, colour = ZEB1)) + + scale_colour_viridis(option = "B") ``` - +plot of chunk plot_magic + +As you can see, the gene-gene relationships are much clearer after MAGIC. -As you can see, the gene-gene relationships are much clearer after -MAGIC. +The data is sometimes a little too smooth - we can decrease `t` from the automatic value to reduce the amount of diffusion. We pass the original result to the argument `init` to avoid recomputing intermediate steps. -The data is sometimes a little too smooth - we can decrease `t` from the -automatic value to reduce the amount of diffusion. We pass the original -result to the argument `init` to avoid recomputing intermediate -steps. -``` r -data_MAGIC <- magic(magic_testdata, genes=c("VIM", "CDH1", "ZEB1"), t=6, init=data_MAGIC) +```r +data_MAGIC <- magic(magic_testdata, genes = c("VIM", "CDH1", "ZEB1"), t = 6, init = data_MAGIC) ggplot(data_MAGIC) + - geom_point(aes(VIM, CDH1, colour=ZEB1)) + - scale_colour_viridis(option="B") + geom_point(aes(VIM, CDH1, colour = ZEB1)) + + scale_colour_viridis(option = "B") ``` - +plot of chunk plot_reduced_t -We can look at the entire smoothed matrix with `genes='all_genes'`, -passing the original result to the argument `init` to avoid recomputing -intermediate steps. Note that this matrix may be large and could take up -a lot of -memory. -``` r -data_MAGIC <- magic(magic_testdata, genes="all_genes", t=6, init=data_MAGIC) +We can look at the entire smoothed matrix with `genes='all_genes'`, passing the original result to the argument `init` to avoid recomputing intermediate steps. Note that this matrix may be large and could take up a lot of memory. + + +```r +data_MAGIC <- magic(magic_testdata, genes = "all_genes", t = 6, init = data_MAGIC) as.data.frame(data_MAGIC)[1:5, 1:10] #> A1BG-AS1 AAMDC AAMP AARSD1 ABCA12 ABCG2 -#> 6564 0.02565716 0.06303703 0.1726791 0.01559474 0.03114244 0.01423031 -#> 3835 0.02535551 0.06286382 0.1678011 0.01547390 0.03017628 0.01428737 -#> 6318 0.02619089 0.06298015 0.1744098 0.01514747 0.03145176 0.01477152 -#> 3284 0.02517645 0.06254417 0.1684572 0.01559623 0.03015758 0.01414733 -#> 1171 0.02651602 0.06289360 0.1729842 0.01514780 0.03162480 0.01480426 -#> ABHD13 AC007773.2 AC011998.4 AC013470.6 -#> 6564 0.07100262 0.001129400 0.001880153 0.003215547 -#> 3835 0.06989726 0.001086716 0.001847604 0.002833342 -#> 6318 0.07165035 0.001203505 0.002044504 0.003550067 -#> 3284 0.07066602 0.001039065 0.001723499 0.002822357 -#> 1171 0.07094679 0.001236082 0.002133401 0.003450875 +#> 6564 0.03332336 0.06672377 0.1718769 0.01765440 0.03641116 0.01703004 +#> 3835 0.03142519 0.06720022 0.1568662 0.01619578 0.03338187 0.01729001 +#> 6318 0.03519781 0.06551774 0.1811869 0.01462556 0.03595934 0.02094741 +#> 3284 0.03130388 0.06374405 0.1621586 0.01686944 0.03288072 0.01786413 +#> 1171 0.03515109 0.06447265 0.1735847 0.01444976 0.03791399 0.01995593 +#> ABHD13 AC007773.2 AC011998.4 AC013470.6 +#> 6564 0.07692547 0.0007960324 0.001382103 0.002978190 +#> 3835 0.07578407 0.0007146892 0.001206586 0.002613474 +#> 6318 0.08120989 0.0011273292 0.001594218 0.005743911 +#> 3284 0.07568180 0.0007009115 0.001017284 0.002982551 +#> 1171 0.07975672 0.0010427596 0.001982926 0.005315534 ``` ### Visualizing MAGIC values on PCA We can visualize the results of MAGIC on PCA as follows. -``` r + +```r data_MAGIC_PCA <- as.data.frame(prcomp(data_MAGIC)$x) ggplot(data_MAGIC_PCA) + - geom_point(aes(x=PC1, y=PC2, color=data_MAGIC$result$VIM)) + - scale_color_viridis(option="B") + - labs(color="VIM") + geom_point(aes(x = PC1, y = PC2, color = data_MAGIC$result$VIM)) + + scale_color_viridis(option = "B") + + labs(color = "VIM") ``` - +plot of chunk run_pca + ### Visualizing MAGIC values on PHATE -We can visualize the results of MAGIC on PHATE as follows. We set `t` -and `k` manually, because this toy dataset is really too small to make -sense with PHATE; however, the default values work well for single-cell -genomic data. +We can visualize the results of MAGIC on PHATE as follows. We set `t` and `k` manually, because this toy dataset is really too small to make sense with PHATE; however, the default values work well for single-cell genomic data. + -``` r -data_PHATE <- phate(magic_testdata, k=3, t=15) +```r +data_PHATE <- phate(magic_testdata, k = 3, t = 15) #> Argument k is deprecated. Using knn instead. ggplot(data_PHATE) + - geom_point(aes(x=PHATE1, y=PHATE2, color=data_MAGIC$result$VIM)) + - scale_color_viridis(option="B") + - labs(color="VIM") + geom_point(aes(x = PHATE1, y = PHATE2, color = data_MAGIC$result$VIM)) + + scale_color_viridis(option = "B") + + labs(color = "VIM") ``` - +plot of chunk run_phate ## Issues @@ -281,7 +264,7 @@ To be consistent with common functions such as PCA data. - **I have installed MAGIC in Python, but Rmagic says it is not - installed\!** + installed!** Check your `reticulate::py_discover_config("magic")` and compare it to the version of Python in which you installed PHATE (run `which python` @@ -296,8 +279,7 @@ You can read more about `Renviron` at ### Help -Please let us know of any issues at the [GitHub -repository](https://github.com/KrishnaswamyLab/MAGIC/issues). If you +Please let us know of any issues at the [GitHub repository](https://github.com/KrishnaswamyLab/MAGIC/issues). If you have any questions or require assistance using MAGIC, please read the documentation by running `help(Rmagic::magic)` or contact us at . diff --git a/Rmagic/data-raw/generate_test_data.R b/Rmagic/data-raw/generate_test_data.R index f0e86ab9..fb8d090e 100644 --- a/Rmagic/data-raw/generate_test_data.R +++ b/Rmagic/data-raw/generate_test_data.R @@ -3,13 +3,15 @@ magic_testdata <- read_csv("../../data/HMLE_TGFb_day_8_10.csv.gz") set.seed(42) keep_cols <- colSums(magic_testdata > 0) > 10 keep_rows <- rowSums(magic_testdata) > 2000 -magic_testdata <- magic_testdata[keep_rows,keep_cols] +magic_testdata <- magic_testdata[keep_rows, keep_cols] magic_testdata <- Rmagic::library.size.normalize(magic_testdata) magic_testdata <- sqrt(magic_testdata) -select_cols <- c(colnames(magic_testdata)[ceiling(runif(200) * nrow(magic_testdata))], - c("VIM", "CDH1", "ZEB1")) -magic_testdata <- magic_testdata[,colnames(magic_testdata) %in% select_cols] +select_cols <- c( + colnames(magic_testdata)[ceiling(runif(200) * nrow(magic_testdata))], + c("VIM", "CDH1", "ZEB1") +) +magic_testdata <- magic_testdata[, colnames(magic_testdata) %in% select_cols] select_rows <- ceiling(runif(500) * nrow(magic_testdata)) -magic_testdata <- magic_testdata[select_rows,] +magic_testdata <- magic_testdata[select_rows, ] write_csv(magic_testdata, "../../data/test_data.csv") usethis::use_data(magic_testdata) diff --git a/Rmagic/inst/CITATION b/Rmagic/inst/CITATION index 3b2b0e13..f8a9b12e 100644 --- a/Rmagic/inst/CITATION +++ b/Rmagic/inst/CITATION @@ -2,20 +2,20 @@ bibentry( bibtype="Article", title="Recovering Gene Interactions from Single-Cell Data Using Data Diffusion", author = c( - person("David", "van Dijk"), - person("Roshan", "Sharma"), - person("Juozas", "Nainys"), - person("Kristina", "Yim"), - person("Pooja", "Kathail"), - person("Ambrose J.", "Carr"), - person("Cassandra", "Burdziak"), - person("Kevin R.", "Moon"), - person("Christine L.", "Chaffer"), - person("Diwakar", "Pattabiraman"), - person("Brian", "Bierie"), - person("Linas", "Mazutis"), - person("Guy", "Wolf"), - person("Smita", "Krishnaswamy"), + person("David", "van Dijk"), + person("Roshan", "Sharma"), + person("Juozas", "Nainys"), + person("Kristina", "Yim"), + person("Pooja", "Kathail"), + person("Ambrose J.", "Carr"), + person("Cassandra", "Burdziak"), + person("Kevin R.", "Moon"), + person("Christine L.", "Chaffer"), + person("Diwakar", "Pattabiraman"), + person("Brian", "Bierie"), + person("Linas", "Mazutis"), + person("Guy", "Wolf"), + person("Smita", "Krishnaswamy"), person("Dana", "Pe'er")), year=2018, url="https://www.cell.com/cell/abstract/S0092-8674(18)30724-4", diff --git a/Rmagic/inst/examples/bonemarrow_tutorial.Rmd b/Rmagic/inst/examples/bonemarrow_tutorial.Rmd index 303ae731..779b9286 100644 --- a/Rmagic/inst/examples/bonemarrow_tutorial.Rmd +++ b/Rmagic/inst/examples/bonemarrow_tutorial.Rmd @@ -60,8 +60,8 @@ In this tutorial, we will analyse myeloid and erythroid cells in mouse bone marr ```{r load_data} # load data bmmsc <- read_csv("https://github.com/KrishnaswamyLab/PHATE/raw/master/data/BMMC_myeloid.csv.gz") -bmmsc <- bmmsc[,2:ncol(bmmsc)] -bmmsc[1:5,1:10] +bmmsc <- bmmsc[, 2:ncol(bmmsc)] +bmmsc[1:5, 1:10] ``` ### Filtering data @@ -71,17 +71,17 @@ First, we need to remove lowly expressed genes and cells with small library size ```{r} # keep genes expressed in at least 10 cells keep_cols <- colSums(bmmsc > 0) > 10 -bmmsc <- bmmsc[,keep_cols] +bmmsc <- bmmsc[, keep_cols] # look at the distribution of library sizes ggplot() + - geom_histogram(aes(x=rowSums(bmmsc)), bins=50) + - geom_vline(xintercept = 1000, color='red') + geom_histogram(aes(x = rowSums(bmmsc)), bins = 50) + + geom_vline(xintercept = 1000, color = "red") ``` ```{r} # keep cells with at least 1000 UMIs keep_rows <- rowSums(bmmsc) > 1000 -bmmsc <- bmmsc[keep_rows,] +bmmsc <- bmmsc[keep_rows, ] ``` ### Normalizing data @@ -99,24 +99,24 @@ Running MAGIC is as simple as running the `magic` function. ```{r run_magic} # run MAGIC -bmmsc_MAGIC <- magic(bmmsc, genes=c("Mpo", "Klf1", "Ifitm1")) +bmmsc_MAGIC <- magic(bmmsc, genes = c("Mpo", "Klf1", "Ifitm1")) ``` We can plot the data before and after MAGIC to visualize the results. ```{r plot_raw} ggplot(bmmsc) + - geom_point(aes(Mpo, Klf1, color=Ifitm1)) + - scale_color_viridis(option="B") -ggsave('BMMSC_data_R_before_magic.png', width=5, height=5) + geom_point(aes(Mpo, Klf1, color = Ifitm1)) + + scale_color_viridis(option = "B") +ggsave("BMMSC_data_R_before_magic.png", width = 5, height = 5) ``` The data suffers from dropout to the point that we cannot infer anything about the gene-gene relationships. ```{r plot_magic} ggplot(bmmsc_MAGIC) + - geom_point(aes(Mpo, Klf1, color=Ifitm1)) + - scale_color_viridis(option="B") + geom_point(aes(Mpo, Klf1, color = Ifitm1)) + + scale_color_viridis(option = "B") ``` As you can see, the gene-gene relationships are much clearer after MAGIC. These relationships also match the biological progression we expect to see - Ifitm1 is a stem cell marker, Klf1 is an erythroid marker, and Mpo is a myeloid marker. @@ -126,12 +126,14 @@ As you can see, the gene-gene relationships are much clearer after MAGIC. These The data is a little too smooth - we can increase `t` from the default value of 3 to increase the amount of diffusion. We pass the original result to the argument `init` to avoid recomputing intermediate steps. ```{r decrease_t} -bmmsc_MAGIC <- magic(bmmsc, genes=c("Mpo", "Klf1", "Ifitm1"), - t=4, init=bmmsc_MAGIC) +bmmsc_MAGIC <- magic(bmmsc, + genes = c("Mpo", "Klf1", "Ifitm1"), + t = 4, init = bmmsc_MAGIC +) ggplot(bmmsc_MAGIC) + - geom_point(aes(Mpo, Klf1, color=Ifitm1)) + - scale_color_viridis(option="B") -ggsave('BMMSC_data_R_after_magic.png', width=5, height=5) + geom_point(aes(Mpo, Klf1, color = Ifitm1)) + + scale_color_viridis(option = "B") +ggsave("BMMSC_data_R_after_magic.png", width = 5, height = 5) ``` ### Visualizing MAGIC values on PCA @@ -139,13 +141,15 @@ ggsave('BMMSC_data_R_after_magic.png', width=5, height=5) We can visualize the results of MAGIC on PCA with `genes="pca_only"`. ```{r run_pca} -bmmsc_MAGIC_PCA <- magic(bmmsc, genes="pca_only", - t=4, init=bmmsc_MAGIC) +bmmsc_MAGIC_PCA <- magic(bmmsc, + genes = "pca_only", + t = 4, init = bmmsc_MAGIC +) # ggplot(bmmsc_MAGIC_PCA) + - geom_point(aes(x=PC1, y=PC2, color=bmmsc_MAGIC$result$Klf1)) + - scale_color_viridis(option="B") + - labs(color="Klf1") -ggsave('BMMSC_data_R_pca_colored_by_magic.png', width=5, height=5) +geom_point(aes(x = PC1, y = PC2, color = bmmsc_MAGIC$result$Klf1)) + + scale_color_viridis(option = "B") + + labs(color = "Klf1") +ggsave("BMMSC_data_R_pca_colored_by_magic.png", width = 5, height = 5) ``` @@ -156,10 +160,10 @@ We can visualize the results of MAGIC on PHATE as follows. ```{r run_phate} bmmsc_PHATE <- phate(bmmsc) ggplot(bmmsc_PHATE) + - geom_point(aes(x=PHATE1, y=PHATE2, color=bmmsc_MAGIC$result$Klf1)) + - scale_color_viridis(option="B") + - labs(color="Klf1") -ggsave('BMMSC_data_R_phate_colored_by_magic.png', width=5, height=5) + geom_point(aes(x = PHATE1, y = PHATE2, color = bmmsc_MAGIC$result$Klf1)) + + scale_color_viridis(option = "B") + + labs(color = "Klf1") +ggsave("BMMSC_data_R_phate_colored_by_magic.png", width = 5, height = 5) ``` ### Using MAGIC for downstream analysis @@ -167,8 +171,10 @@ ggsave('BMMSC_data_R_phate_colored_by_magic.png', width=5, height=5) We can look at the entire smoothed matrix with `genes='all_genes'`, passing the original result to the argument `init` to avoid recomputing intermediate steps. Note that this matrix may be large and could take up a lot of memory. ```{r run_magic_full_matrix} -bmmsc_MAGIC <- magic(bmmsc, genes="all_genes", - t=4, init=bmmsc_MAGIC) +bmmsc_MAGIC <- magic(bmmsc, + genes = "all_genes", + t = 4, init = bmmsc_MAGIC +) as.data.frame(bmmsc_MAGIC)[1:5, 1:10] ``` diff --git a/Rmagic/inst/examples/bonemarrow_tutorial.html b/Rmagic/inst/examples/bonemarrow_tutorial.html index 701dc4aa..292d1c5e 100644 --- a/Rmagic/inst/examples/bonemarrow_tutorial.html +++ b/Rmagic/inst/examples/bonemarrow_tutorial.html @@ -1630,10 +1630,10 @@

Loading packages

library(viridis)
## Loading required package: viridisLite
library(phateR)
-
## 
+
##
 ## Attaching package: 'phateR'
## The following object is masked from 'package:Rmagic':
-## 
+##
 ##     library.size.normalize
@@ -1698,7 +1698,7 @@

Running MAGIC

Rerunning MAGIC with new parameters

The data is a little too smooth - we can decrease t from the automatic value to reduce the amount of diffusion. We pass the original result to the argument init to avoid recomputing intermediate steps.

-
bmmsc_MAGIC <- magic(bmmsc, genes=c("Mpo", "Klf1", "Ifitm1"), 
+
bmmsc_MAGIC <- magic(bmmsc, genes=c("Mpo", "Klf1", "Ifitm1"),
                      t=4, init=bmmsc_MAGIC)
 ggplot(bmmsc_MAGIC) +
   geom_point(aes(Mpo, Klf1, color=Ifitm1)) +
@@ -1709,7 +1709,7 @@ 

Rerunning MAGIC with new parameters

Visualizing MAGIC values on PCA

We can visualize the results of MAGIC on PCA with genes="pca_only".

-
bmmsc_MAGIC_PCA <- magic(bmmsc, genes="pca_only", 
+
bmmsc_MAGIC_PCA <- magic(bmmsc, genes="pca_only",
                          t=4, init=bmmsc_MAGIC)
 ggplot(bmmsc_MAGIC_PCA) +
   geom_point(aes(x=PC1, y=PC2, color=bmmsc_MAGIC$result$Klf1)) +
@@ -1732,7 +1732,7 @@ 

Visualizing MAGIC values on PHATE

Using MAGIC for downstream analysis

We can look at the entire smoothed matrix with genes='all_genes', passing the original result to the argument init to avoid recomputing intermediate steps. Note that this matrix may be large and could take up a lot of memory.

-
bmmsc_MAGIC <- magic(bmmsc, genes="all_genes", 
+
bmmsc_MAGIC <- magic(bmmsc, genes="all_genes",
                      t=4, init=bmmsc_MAGIC)
 as.data.frame(bmmsc_MAGIC)[1:5, 1:10]
diff --git a/Rmagic/inst/examples/emt_tutorial.Rmd b/Rmagic/inst/examples/emt_tutorial.Rmd index c3c898d9..6ed284cf 100644 --- a/Rmagic/inst/examples/emt_tutorial.Rmd +++ b/Rmagic/inst/examples/emt_tutorial.Rmd @@ -60,7 +60,7 @@ In this tutorial, we will analyze single-cell RNA sequencing of the epithelial t ```{r load_data} # load data data <- read_csv("../../../data/HMLE_TGFb_day_8_10.csv.gz") -data[1:5,1:10] +data[1:5, 1:10] ``` ### Filtering data @@ -70,7 +70,7 @@ First, we need to remove lowly expressed genes. ```{r remove_rare_genes} # keep genes expressed in at least 10 cells keep_cols <- colSums(data > 0) > 10 -data <- data[,keep_cols] +data <- data[, keep_cols] ``` Ordinarily, we would remove cells with small library sizes. In this dataset, it has already been done; however, if you wanted to do that, you could do it with the code below. @@ -78,15 +78,15 @@ Ordinarily, we would remove cells with small library sizes. In this dataset, it ```{r libsize_histogram} # look at the distribution of library sizes ggplot() + - geom_histogram(aes(x=rowSums(data)), bins=50) + - geom_vline(xintercept = 1000, color='red') + geom_histogram(aes(x = rowSums(data)), bins = 50) + + geom_vline(xintercept = 1000, color = "red") ``` ```{r filter_libsize} if (FALSE) { # keep cells with at least 1000 UMIs and at most 15000 keep_rows <- rowSums(data) > 1000 & rowSums(data) < 15000 - data <- data[keep_rows,] + data <- data[keep_rows, ] } ``` @@ -107,39 +107,41 @@ Running MAGIC is as simple as running the `magic` function. Because this dataset ```{r run_magic} # run MAGIC -data_MAGIC <- magic(data, knn=3, genes=c("VIM", "CDH1", "ZEB1")) +data_MAGIC <- magic(data, knn = 3, genes = c("VIM", "CDH1", "ZEB1")) ``` We can plot the data before and after MAGIC to visualize the results. ```{r plot_raw} ggplot(data) + - geom_point(aes(VIM, CDH1, color=ZEB1)) + - scale_color_viridis(option="B") -ggsave('EMT_data_R_before_magic.png', width=5, height=5) + geom_point(aes(VIM, CDH1, color = ZEB1)) + + scale_color_viridis(option = "B") +ggsave("EMT_data_R_before_magic.png", width = 5, height = 5) ``` ```{r plot_magic} ggplot(data_MAGIC) + - geom_point(aes(VIM, CDH1, color=ZEB1)) + - scale_color_viridis(option="B") -ggsave('EMT_data_R_after_magic.png', width=5, height=5) + geom_point(aes(VIM, CDH1, color = ZEB1)) + + scale_color_viridis(option = "B") +ggsave("EMT_data_R_after_magic.png", width = 5, height = 5) ``` -As you can see, the gene-gene relationships are much clearer after MAGIC. +As you can see, the gene-gene relationships are much clearer after MAGIC. ### Visualizing MAGIC values on PCA We can visualize the results of MAGIC on PCA with `genes="pca_only"`. ```{r run_pca} -data_MAGIC_PCA <- magic(data, genes="pca_only", - knn=15, init=data_MAGIC) +data_MAGIC_PCA <- magic(data, + genes = "pca_only", + knn = 15, init = data_MAGIC +) ggplot(data_MAGIC_PCA) + - geom_point(aes(x=PC1, y=PC2, color=data_MAGIC$result$VIM)) + - scale_color_viridis(option="B") + - labs(color="VIM") -ggsave('EMT_data_R_pca_colored_by_magic.png', width=5, height=5) + geom_point(aes(x = PC1, y = PC2, color = data_MAGIC$result$VIM)) + + scale_color_viridis(option = "B") + + labs(color = "VIM") +ggsave("EMT_data_R_pca_colored_by_magic.png", width = 5, height = 5) ``` ### Using MAGIC for downstream analysis @@ -147,8 +149,10 @@ ggsave('EMT_data_R_pca_colored_by_magic.png', width=5, height=5) We can look at the entire smoothed matrix with `genes='all_genes'`, passing the original result to the argument `init` to avoid recomputing intermediate steps. Note that this matrix may be large and could take up a lot of memory. ```{r run_magic_full_matrix} -data_MAGIC <- magic(data, genes="all_genes", - knn=15, init=data_MAGIC) +data_MAGIC <- magic(data, + genes = "all_genes", + knn = 15, init = data_MAGIC +) as.data.frame(data_MAGIC)[1:5, 1:10] ``` diff --git a/Rmagic/inst/examples/emt_tutorial.html b/Rmagic/inst/examples/emt_tutorial.html index 51a13431..8b66b1f8 100644 --- a/Rmagic/inst/examples/emt_tutorial.html +++ b/Rmagic/inst/examples/emt_tutorial.html @@ -1695,10 +1695,10 @@

Loading packages

library(viridis)
## Loading required package: viridisLite
library(phateR)
-
## 
+
##
 ## Attaching package: 'phateR'
## The following object is masked from 'package:Rmagic':
-## 
+##
 ##     library.size.normalize
@@ -1765,7 +1765,7 @@

Running MAGIC

Visualizing MAGIC values on PCA

We can visualize the results of MAGIC on PCA with genes="pca_only".

-
data_MAGIC_PCA <- magic(data, genes="pca_only", 
+
data_MAGIC_PCA <- magic(data, genes="pca_only",
                         knn=15, init=data_MAGIC)
 ggplot(data_MAGIC_PCA) +
   geom_point(aes(x=PC1, y=PC2, color=data_MAGIC$result$VIM)) +
@@ -1777,7 +1777,7 @@ 

Visualizing MAGIC values on PCA

Using MAGIC for downstream analysis

We can look at the entire smoothed matrix with genes='all_genes', passing the original result to the argument init to avoid recomputing intermediate steps. Note that this matrix may be large and could take up a lot of memory.

-
data_MAGIC <- magic(data, genes="all_genes", 
+
data_MAGIC <- magic(data, genes="all_genes",
                     knn=15, init=data_MAGIC)
 as.data.frame(data_MAGIC)[1:5, 1:10]
diff --git a/Rmagic/man/figures/README-plot_magic-1.png b/Rmagic/man/figures/README-plot_magic-1.png index 6a44fa4f..2dbe0563 100644 Binary files a/Rmagic/man/figures/README-plot_magic-1.png and b/Rmagic/man/figures/README-plot_magic-1.png differ diff --git a/Rmagic/man/figures/README-plot_raw-1.png b/Rmagic/man/figures/README-plot_raw-1.png index 71ee5439..8cdd8e9c 100644 Binary files a/Rmagic/man/figures/README-plot_raw-1.png and b/Rmagic/man/figures/README-plot_raw-1.png differ diff --git a/Rmagic/man/figures/README-plot_reduced_t-1.png b/Rmagic/man/figures/README-plot_reduced_t-1.png index 9153f679..06281dc6 100644 Binary files a/Rmagic/man/figures/README-plot_reduced_t-1.png and b/Rmagic/man/figures/README-plot_reduced_t-1.png differ diff --git a/Rmagic/man/figures/README-run_pca-1.png b/Rmagic/man/figures/README-run_pca-1.png index f7f1fb49..43693bd9 100644 Binary files a/Rmagic/man/figures/README-run_pca-1.png and b/Rmagic/man/figures/README-run_pca-1.png differ diff --git a/Rmagic/man/figures/README-run_phate-1.png b/Rmagic/man/figures/README-run_phate-1.png index 0ebf4e1a..cbd40fb0 100644 Binary files a/Rmagic/man/figures/README-run_phate-1.png and b/Rmagic/man/figures/README-run_phate-1.png differ diff --git a/Rmagic/man/ggplot.Rd b/Rmagic/man/ggplot.Rd index a6a9d6d7..fd0f87b0 100644 --- a/Rmagic/man/ggplot.Rd +++ b/Rmagic/man/ggplot.Rd @@ -16,11 +16,9 @@ Passes the smoothed data matrix to ggplot } \examples{ if (pymagic_is_available() && require(ggplot2)) { - -data(magic_testdata) -data_magic <- magic(magic_testdata, genes=c("VIM", "CDH1", "ZEB1")) -ggplot(data_magic, aes(VIM, CDH1, colour=ZEB1)) + - geom_point() - + data(magic_testdata) + data_magic <- magic(magic_testdata, genes = c("VIM", "CDH1", "ZEB1")) + ggplot(data_magic, aes(VIM, CDH1, colour = ZEB1)) + + geom_point() } } diff --git a/Rmagic/man/magic.Rd b/Rmagic/man/magic.Rd index 5468e57e..360e98b4 100644 --- a/Rmagic/man/magic.Rd +++ b/Rmagic/man/magic.Rd @@ -142,50 +142,45 @@ van Dijk et al, 2018. } \examples{ if (pymagic_is_available()) { - -data(magic_testdata) - -# Run MAGIC -data_magic <- magic(magic_testdata, genes=c("VIM", "CDH1", "ZEB1")) -summary(data_magic) -## CDH1 VIM ZEB1 -## Min. :0.4303 Min. :3.854 Min. :0.01111 -## 1st Qu.:0.4444 1st Qu.:3.947 1st Qu.:0.01145 -## Median :0.4462 Median :3.964 Median :0.01153 -## Mean :0.4461 Mean :3.965 Mean :0.01152 -## 3rd Qu.:0.4478 3rd Qu.:3.982 3rd Qu.:0.01160 -## Max. :0.4585 Max. :4.127 Max. :0.01201 - -# Plot the result with ggplot2 -if (require(ggplot2)) { - ggplot(data_magic) + - geom_point(aes(x=VIM, y=CDH1, color=ZEB1)) -} - -# Run MAGIC again returning all genes -# We use the last run as initialization -data_magic <- magic(magic_testdata, genes="all_genes", init=data_magic) -# Extract the smoothed data matrix to use in downstream analysis -data_smooth <- as.matrix(data_magic) - + data(magic_testdata) + + # Run MAGIC + data_magic <- magic(magic_testdata, genes = c("VIM", "CDH1", "ZEB1")) + summary(data_magic) + ## CDH1 VIM ZEB1 + ## Min. :0.4303 Min. :3.854 Min. :0.01111 + ## 1st Qu.:0.4444 1st Qu.:3.947 1st Qu.:0.01145 + ## Median :0.4462 Median :3.964 Median :0.01153 + ## Mean :0.4461 Mean :3.965 Mean :0.01152 + ## 3rd Qu.:0.4478 3rd Qu.:3.982 3rd Qu.:0.01160 + ## Max. :0.4585 Max. :4.127 Max. :0.01201 + + # Plot the result with ggplot2 + if (require(ggplot2)) { + ggplot(data_magic) + + geom_point(aes(x = VIM, y = CDH1, color = ZEB1)) + } + + # Run MAGIC again returning all genes + # We use the last run as initialization + data_magic <- magic(magic_testdata, genes = "all_genes", init = data_magic) + # Extract the smoothed data matrix to use in downstream analysis + data_smooth <- as.matrix(data_magic) } if (pymagic_is_available() && require(Seurat)) { + data(magic_testdata) -data(magic_testdata) + # Create a Seurat object + seurat_object <- CreateSeuratObject(counts = t(magic_testdata), assay = "RNA") + seurat_object <- NormalizeData(object = seurat_object) + seurat_object <- ScaleData(object = seurat_object) -# Create a Seurat object -seurat_object <- CreateSeuratObject(counts = t(magic_testdata), assay="RNA") -seurat_object <- NormalizeData(object = seurat_object) -seurat_object <- ScaleData(object = seurat_object) - -# Run MAGIC and reset the active assay -seurat_object <- magic(seurat_object) -seurat_object@active.assay = "MAGIC_RNA" - -# Analyze with Seurat -VlnPlot(seurat_object, features=c("VIM", "ZEB1", "CDH1")) + # Run MAGIC and reset the active assay + seurat_object <- magic(seurat_object) + seurat_object@active.assay <- "MAGIC_RNA" + # Analyze with Seurat + VlnPlot(seurat_object, features = c("VIM", "ZEB1", "CDH1")) } - } diff --git a/Rmagic/man/magic_testdata.Rd b/Rmagic/man/magic_testdata.Rd index 1237bae2..8e384dc1 100644 --- a/Rmagic/man/magic_testdata.Rd +++ b/Rmagic/man/magic_testdata.Rd @@ -4,7 +4,9 @@ \name{magic_testdata} \alias{magic_testdata} \title{Fake scRNAseq data for examples} -\format{A matrix with 500 rows and 197 variables} +\format{ +A matrix with 500 rows and 197 variables +} \source{ The authors } diff --git a/Rmagic/man/print.Rd b/Rmagic/man/print.Rd index 5050af74..fb5d4be3 100644 --- a/Rmagic/man/print.Rd +++ b/Rmagic/man/print.Rd @@ -16,14 +16,12 @@ This avoids spamming the user's console with a list of many large matrices } \examples{ if (pymagic_is_available()) { - -data(magic_testdata) -data_magic <- magic(magic_testdata) -print(data_magic) -## MAGIC with elements -## $result : (500, 197) -## $operator : Python MAGIC operator -## $params : list with elements (data, knn, decay, t, npca, knn.dist.method) - + data(magic_testdata) + data_magic <- magic(magic_testdata) + print(data_magic) + ## MAGIC with elements + ## $result : (500, 197) + ## $operator : Python MAGIC operator + ## $params : list with elements (data, knn, decay, t, npca, knn.dist.method) } } diff --git a/Rmagic/man/summary.Rd b/Rmagic/man/summary.Rd index daf446c4..6f5c85e5 100644 --- a/Rmagic/man/summary.Rd +++ b/Rmagic/man/summary.Rd @@ -16,17 +16,15 @@ Summarize a MAGIC object } \examples{ if (pymagic_is_available()) { - -data(magic_testdata) -data_magic <- magic(magic_testdata) -summary(data_magic) -## ZEB1 -## Min. :0.01071 -## 1st Qu.:0.01119 -## Median :0.01130 -## Mean :0.01129 -## 3rd Qu.:0.01140 -## Max. :0.01201 - + data(magic_testdata) + data_magic <- magic(magic_testdata) + summary(data_magic) + ## ZEB1 + ## Min. :0.01071 + ## 1st Qu.:0.01119 + ## Median :0.01130 + ## Mean :0.01129 + ## 3rd Qu.:0.01140 + ## Max. :0.01201 } } diff --git a/Rmagic/tests/test_magic.R b/Rmagic/tests/test_magic.R index 13c949d9..2cb5f174 100644 --- a/Rmagic/tests/test_magic.R +++ b/Rmagic/tests/test_magic.R @@ -8,12 +8,12 @@ library(viridis) seurat_obj <- function() { # load data - data <- read.csv('../../data/HMLE_TGFb_day_8_10.csv.gz') - + data <- read.csv("../../data/HMLE_TGFb_day_8_10.csv.gz") + seurat_raw_data <- t(data) rownames(seurat_raw_data) <- colnames(data) colnames(seurat_raw_data) <- rownames(data) - seurat_obj <- Seurat::CreateSeuratObject(raw.data=seurat_raw_data) + seurat_obj <- Seurat::CreateSeuratObject(raw.data = seurat_raw_data) # run MAGIC data_MAGIC <- magic(data) @@ -22,12 +22,12 @@ seurat_obj <- function() { # plot p <- ggplot(data) + - geom_point(aes(VIM, CDH1, colour=ZEB1)) + - scale_colour_viridis(option="B") - ggsave('EMT_data_R_before_magic.png', plot=p, width=5, height=5) + geom_point(aes(VIM, CDH1, colour = ZEB1)) + + scale_colour_viridis(option = "B") + ggsave("EMT_data_R_before_magic.png", plot = p, width = 5, height = 5) p_m <- ggplot(data_MAGIC) + - geom_point(aes(VIM, CDH1, colour=ZEB1)) + - scale_colour_viridis(option="B") - ggsave('EMT_data_R_after_magic.png', plot=p_m, width=5, height=5) + geom_point(aes(VIM, CDH1, colour = ZEB1)) + + scale_colour_viridis(option = "B") + ggsave("EMT_data_R_after_magic.png", plot = p_m, width = 5, height = 5) } diff --git a/autoblack.sh b/autoblack.sh deleted file mode 100644 index 981a7b4f..00000000 --- a/autoblack.sh +++ /dev/null @@ -1,14 +0,0 @@ -cat <> .git/hooks/pre-commit -#!/bin/sh - -set -e - -files=\$(git diff --staged --name-only --diff-filter=d -- "*.py") - -for file in \$files; do - black -q -t py35 \$file - git add \$file -done -EOF -chmod +x .git/hooks/pre-commit - diff --git a/matlab/compute_kernel.m b/matlab/compute_kernel.m index e7dadd04..28c0728f 100644 --- a/matlab/compute_kernel.m +++ b/matlab/compute_kernel.m @@ -1,7 +1,7 @@ function K = compute_alpha_kernel_sparse(data, varargin) % K = computer_alpha_kernel_sparse(data, varargin) % Computes sparse alpha-decay kernel -% varargin: +% varargin: % 'npca' (default = [], no PCA) % Perform fast random PCA before computing distances % 'k' (default = 5) @@ -69,7 +69,7 @@ idx_thresh=find(below_thresh); -if ~isempty(idx_thresh) +if ~isempty(idx_thresh) K=exp(-(kdist(idx_thresh,:)./epsilon(idx_thresh)).^a); K(K<=th)=0; K=K(:); @@ -88,11 +88,11 @@ epsilon2=epsilon(~below_thresh); disp(['Next iteration: k= ' num2str(k_knn)]) [idx2, kdist2]=knnsearch(data_pc,data_pc2,'k',k_knn,'Distance',distfun); - + % Find the points that have large enough distance below_thresh2=kdist2(:,end)>=bth*epsilon2; idx_thresh2=find(below_thresh2); - + if ~isempty(idx_thresh2) K2=exp(-(kdist2(idx_thresh2,:)./epsilon2(idx_thresh2)).^a); K2(K2<=th)=0; @@ -101,7 +101,7 @@ i2=i2(:); idx_temp=idx2(idx_thresh2,:); j2=idx_temp(:); - + i=[i; i2]; j=[j; j2]; K=[K; K2(:)]; @@ -126,7 +126,7 @@ K2(K2<=th)=0; K=[K; K2(:)]; end - + end % Build the kernel @@ -135,4 +135,3 @@ disp ' Symmetrize affinities' K = K + K'; disp ' Done computing kernel' - diff --git a/matlab/compute_optimal_t.m b/matlab/compute_optimal_t.m index c5d66322..b58f3ba5 100644 --- a/matlab/compute_optimal_t.m +++ b/matlab/compute_optimal_t.m @@ -32,7 +32,7 @@ data_prev = data_curr; end t_opt = find(error_vec < th, 1, 'first'); - + figure; hold all; plot(1:t_max, error_vec, '*-'); @@ -60,12 +60,3 @@ end disp(['optimal t = ' num2str(t_opt)]); - - - - - - - - - diff --git a/matlab/load_10x.m b/matlab/load_10x.m index 75e95d54..2b5d8e70 100644 --- a/matlab/load_10x.m +++ b/matlab/load_10x.m @@ -10,7 +10,7 @@ if isempty(data_dir) data_dir = './'; elseif data_dir(end) ~= '/' - data_dir = [data_dir '/']; + data_dir = [data_dir '/']; end for i=1:length(varargin)-1 @@ -38,7 +38,7 @@ dataMatrix_cells = table2cell( ... readtable(filename_cells, ... 'FileType','text','ReadVariableNames',0)); - + % Remove empty cells col_keep = any(dataMatrix,1); dataMatrix = dataMatrix(:,col_keep); diff --git a/matlab/mmread.m b/matlab/mmread.m index de60c0bc..add56fad 100644 --- a/matlab/mmread.m +++ b/matlab/mmread.m @@ -11,7 +11,7 @@ % 'array' (dense array storage). The data will be duplicated % as appropriate if symmetry is indicated in the header. % -% Optionally, size information about the matrix can be +% Optionally, size information about the matrix can be % obtained by using the return values rows, cols, and % entries, where entries is the number of nonzero entries % in the final matrix. Type information can also be retrieved @@ -31,9 +31,9 @@ end % NOTE: If using a version of Matlab for which strtok is not -% defined, substitute 'gettok' for 'strtok' in the +% defined, substitute 'gettok' for 'strtok' in the % following lines, and download gettok.m from the -% Matrix Market site. +% Matrix Market site. [head0,header] = strtok(header); % see note above [head1,header] = strtok(header); [rep,header] = strtok(header); @@ -44,7 +44,7 @@ field = lower(field); symm = lower(symm); if ( length(symm) == 0 ) - disp(['Not enough words in header line of file ',filename]) + disp(['Not enough words in header line of file ',filename]) disp('Recognized format: ') disp('%%MatrixMarket matrix representation field symmetry') error('Check header line.') @@ -69,7 +69,7 @@ % Read size information, then branch according to % sparse or dense format -if ( strcmp(rep,'coordinate')) % read matrix given in sparse +if ( strcmp(rep,'coordinate')) % read matrix given in sparse % coordinate matrix format [sizeinfo,count] = sscanf(commentline,'%d%d%d'); @@ -86,9 +86,9 @@ rows = sizeinfo(1); cols = sizeinfo(2); entries = sizeinfo(3); - + if ( strcmp(field,'real') || strcmp(field,'integer') ) % real valued entries: - + [T,count] = fscanf(mmfile,'%f',3); T = [T; fscanf(mmfile,'%f')]; if ( size(T) ~= 3*entries ) @@ -100,9 +100,9 @@ end T = reshape(T,3,entries)'; A = sparse(T(:,1), T(:,2), T(:,3), rows , cols); - + elseif ( strcmp(field,'complex')) % complex valued entries: - + T = fscanf(mmfile,'%f',4); T = [T; fscanf(mmfile,'%f')]; if ( size(T) ~= 4*entries ) @@ -114,9 +114,9 @@ end T = reshape(T,4,entries)'; A = sparse(T(:,1), T(:,2), T(:,3) + T(:,4)*sqrt(-1), rows , cols); - + elseif ( strcmp(field,'pattern')) % pattern matrix (no values given): - + T = fscanf(mmfile,'%f',2); T = [T; fscanf(mmfile,'%f')]; if ( size(T) ~= 2*entries ) @@ -131,7 +131,7 @@ end -elseif ( strcmp(rep,'array') ) % read matrix given in dense +elseif ( strcmp(rep,'array') ) % read matrix given in dense % array (column major) format [sizeinfo,count] = sscanf(commentline,'%d%d'); @@ -151,7 +151,7 @@ if ( strcmp(field,'real') || strcmp(field,'integer') ) % real valued entries: A = fscanf(mmfile,'%f',1); A = [A; fscanf(mmfile,'%f')]; - if ( strcmp(symm,'symmetric') | strcmp(symm,'hermitian') | strcmp(symm,'skew-symmetric') ) + if ( strcmp(symm,'symmetric') | strcmp(symm,'hermitian') | strcmp(symm,'skew-symmetric') ) for j=1:cols-1, currenti = j*rows; A = [A(1:currenti); zeros(j,1);A(currenti+1:length(A))]; @@ -176,7 +176,7 @@ tmpi = fscanf(mmfile,'%f',1); A = [A; tmpr + tmpi*i]; end - if ( strcmp(symm,'symmetric') | strcmp(symm,'hermitian') | strcmp(symm,'skew-symmetric') ) + if ( strcmp(symm,'symmetric') | strcmp(symm,'hermitian') | strcmp(symm,'skew-symmetric') ) for j=1:cols-1, currenti = j*rows; A = [A(1:currenti); zeros(j,1);A(currenti+1:length(A))]; @@ -219,4 +219,3 @@ fclose(mmfile); % Done. - diff --git a/matlab/svdpca.m b/matlab/svdpca.m index 727b2e18..5797148c 100644 --- a/matlab/svdpca.m +++ b/matlab/svdpca.m @@ -24,4 +24,3 @@ [U,S,V] = randPCA(X, k); Y = U*S*V'; end - diff --git a/matlab/svdpca_sparse.m b/matlab/svdpca_sparse.m index 2172276e..ed8b2375 100644 --- a/matlab/svdpca_sparse.m +++ b/matlab/svdpca_sparse.m @@ -18,4 +18,3 @@ disp 'No PCA performed' pc = X; end - diff --git a/matlab/test_magic.m b/matlab/test_magic.m index db9f4559..42aa6eb5 100644 --- a/matlab/test_magic.m +++ b/matlab/test_magic.m @@ -115,4 +115,3 @@ ylabel(h,plot_genes{4}); view([-50 22]); title 'After MAGIC' - diff --git a/python/README.rst b/python/README.rst index 11e4f333..40ac954d 100644 --- a/python/README.rst +++ b/python/README.rst @@ -8,9 +8,9 @@ Markov Affinity-based Graph Imputation of Cells (MAGIC) .. image:: https://img.shields.io/cran/v/Rmagic.svg :target: https://cran.r-project.org/package=Rmagic :alt: Latest CRAN version -.. image:: https://api.travis-ci.com/KrishnaswamyLab/MAGIC.svg?branch=master - :target: https://travis-ci.com/KrishnaswamyLab/MAGIC - :alt: Travis CI Build +.. image:: https://img.shields.io/github/workflow/status/KrishnaswamyLab/MAGIC/Unit%20Tests/master?label=Github%20Actions + :target: https://github.com/KrishnaswamyLab/MAGIC/actions + :alt: GitHub Actions Build .. image:: https://img.shields.io/readthedocs/magic.svg :target: https://magic.readthedocs.io/ :alt: Read the Docs diff --git a/python/doc/Makefile b/python/doc/Makefile index acdb12ed..c5965533 100644 --- a/python/doc/Makefile +++ b/python/doc/Makefile @@ -17,4 +17,4 @@ help: # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/python/doc/source/api.rst b/python/doc/source/api.rst index 66b6e079..b17b7461 100644 --- a/python/doc/source/api.rst +++ b/python/doc/source/api.rst @@ -15,4 +15,4 @@ Plotting .. automodule:: magic.plot :members: :inherited-members: - :show-inheritance: \ No newline at end of file + :show-inheritance: diff --git a/python/doc/source/index.rst b/python/doc/source/index.rst index b22df14a..51873a7a 100644 --- a/python/doc/source/index.rst +++ b/python/doc/source/index.rst @@ -12,7 +12,7 @@ MAGIC - Markov Affinity-based Graph Imputation of Cells .. raw:: html - Travis CI Build + GitHub Actions Build .. raw:: html diff --git a/python/doc/source/tutorial.rst b/python/doc/source/tutorial.rst index 94d500b8..361713c8 100644 --- a/python/doc/source/tutorial.rst +++ b/python/doc/source/tutorial.rst @@ -23,4 +23,4 @@ A second tutorial analyzing myeloid and erythroid cells in mouse bone marrow is .. _here: http://nbviewer.jupyter.org/github/KrishnaswamyLab/magic/blob/master/python/tutorial_notebooks/bonemarrow_tutorial.ipynb -__ here_ \ No newline at end of file +__ here_ diff --git a/python/magic/__init__.py b/python/magic/__init__.py index 0d129c76..a8e8b6a7 100644 --- a/python/magic/__init__.py +++ b/python/magic/__init__.py @@ -1,8 +1,4 @@ -from __future__ import absolute_import - from .magic import MAGIC from .version import __version__ -import magic.io -import magic.preprocessing import magic.plot diff --git a/python/magic/io.py b/python/magic/io.py deleted file mode 100644 index aadbf6df..00000000 --- a/python/magic/io.py +++ /dev/null @@ -1,115 +0,0 @@ -# author: Scott Gigante -# (C) 2018 Krishnaswamy Lab GPLv2 - -from __future__ import print_function, division -import warnings -import scprep - - -def load_csv( - filename, - cell_axis="row", - delimiter=",", - gene_names=True, - cell_names=True, - sparse=False, - **kwargs -): - """magic.io is deprecated. Please use scprep.io instead. - Read more at http://scprep.readthedocs.io/ - """ - raise RuntimeError( - "magic.io is deprecated. Please use scprep.io instead. " - "Read more at http://scprep.readthedocs.io", - FutureWarning, - ) - - -def load_tsv( - filename, - cell_axis="row", - delimiter="\t", - gene_names=True, - cell_names=True, - sparse=False, - **kwargs -): - """magic.io is deprecated. Please use scprep.io instead. - Read more at http://scprep.readthedocs.io/ - """ - raise RuntimeError( - "magic.io is deprecated. Please use scprep.io instead. " - "Read more at http://scprep.readthedocs.io", - FutureWarning, - ) - - -def load_fcs( - filename, - gene_names=True, - cell_names=True, - sparse=None, - metadata_channels=[ - "Time", - "Event_length", - "DNA1", - "DNA2", - "Cisplatin", - "beadDist", - "bead1", - ], -): - """magic.io is deprecated. Please use scprep.io instead. - Read more at http://scprep.readthedocs.io/ - """ - raise RuntimeError( - "magic.io is deprecated. Please use scprep.io instead. " - "Read more at http://scprep.readthedocs.io", - FutureWarning, - ) - - -def load_mtx(mtx_file, cell_axis="row", gene_names=None, cell_names=None, sparse=None): - """magic.io is deprecated. Please use scprep.io instead. - Read more at http://scprep.readthedocs.io/ - """ - raise RuntimeError( - "magic.io is deprecated. Please use scprep.io instead. " - "Read more at http://scprep.readthedocs.io", - FutureWarning, - ) - - -def load_10X(data_dir, sparse=True, gene_labels="symbol", allow_duplicates=None): - """magic.io is deprecated. Please use scprep.io instead. - Read more at http://scprep.readthedocs.io/ - """ - raise RuntimeError( - "magic.io is deprecated. Please use scprep.io instead. " - "Read more at http://scprep.readthedocs.io", - FutureWarning, - ) - - -def load_10X_zip(filename, sparse=True, gene_labels="symbol", allow_duplicates=None): - """magic.io is deprecated. Please use scprep.io instead. - Read more at http://scprep.readthedocs.io/ - """ - raise RuntimeError( - "magic.io is deprecated. Please use scprep.io instead. " - "Read more at http://scprep.readthedocs.io", - FutureWarning, - ) - - -def load_10X_HDF5( - filename, genome=None, sparse=True, gene_labels="symbol", allow_duplicates=None -): - """magic.io is deprecated. Please use scprep.io instead. - Read more at http://scprep.readthedocs.io/ - """ - raise RuntimeError( - "magic.io is deprecated. Please use scprep.io instead. " - "Read more at http://scprep.readthedocs.io", - FutureWarning, - ) diff --git a/python/magic/magic.py b/python/magic/magic.py index 65c02bcb..3272accb 100644 --- a/python/magic/magic.py +++ b/python/magic/magic.py @@ -6,28 +6,21 @@ (C) 2018 Krishnaswamy Lab GPLv2 """ -from __future__ import print_function, division, absolute_import - -import numpy as np -import graphtools +from . import utils +from scipy import sparse +from scipy import spatial from sklearn.base import BaseEstimator -from sklearn.exceptions import NotFittedError from sklearn.decomposition import PCA -import warnings +from sklearn.exceptions import NotFittedError + +import graphtools import matplotlib.pyplot as plt -from scipy import sparse, spatial -import pandas as pd import numbers -import tasklogger +import numpy as np +import pandas as pd import scprep - -from . import utils - -try: - import anndata -except ImportError: - # anndata not installed - pass +import tasklogger +import warnings _logger = tasklogger.get_tasklogger("graphtools") @@ -91,10 +84,6 @@ class MAGIC(BaseEstimator): verbose : `int` or `boolean`, optional (default: 1) If `True` or `> 0`, print status messages - k : Deprecated for `knn` - - a : Deprecated for `decay` - Attributes ---------- @@ -154,23 +143,7 @@ def __init__( n_jobs=1, random_state=None, verbose=1, - k=None, - a=None, ): - if k is not None: - warnings.warn( - "Parameter `k` is deprecated and will be removed" - " in a future version. Use `knn` instead", - FutureWarning, - ) - knn = k - if a is not None: - warnings.warn( - "Parameter `a` is deprecated and will be removed" - " in a future version. Use `decay` instead", - FutureWarning, - ) - decay = a self.knn = knn self.knn_max = knn_max self.decay = decay @@ -223,7 +196,7 @@ def _check_params(self): """ utils.check_positive(knn=self.knn) utils.check_int(knn=self.knn, n_jobs=self.n_jobs) - # TODO: epsilon + # TODO(scottgigante): epsilon utils.check_if_not( None, utils.check_positive, @@ -319,10 +292,6 @@ def set_params(self, **params): verbose : `int` or `boolean`, optional (default: 1) If `True` or `> 0`, print status messages - k : Deprecated for `knn` - - a : Deprecated for `decay` - Returns ------- self @@ -336,24 +305,6 @@ def set_params(self, **params): del params["t"] # kernel parameters - if "k" in params and params["k"] != self.knn: - warnings.warn( - "Parameter `k` is deprecated and will be removed" - " in a future version. Use `knn` instead", - FutureWarning, - ) - self.knn = params["k"] - reset_kernel = True - del params["k"] - if "a" in params and params["a"] != self.decay: - warnings.warn( - "Parameter `a` is deprecated and will be removed" - " in a future version. Use `decay` instead", - FutureWarning, - ) - self.decay = params["a"] - reset_kernel = True - del params["a"] if "knn" in params and params["knn"] != self.knn: self.knn = params["knn"] reset_kernel = True @@ -498,11 +449,9 @@ def fit(self, X, graph=None): return self def _parse_genes(self, X, genes): - if ( - genes is None - and (sparse.issparse(X) or scprep.utils.is_sparse_dataframe(X)) - and np.prod(X.shape) > 5000 * 20000 - ): + X_sparse = sparse.issparse(X) or scprep.utils.is_sparse_dataframe(X) + X_large = np.prod(X.shape) > 5000 * 20000 + if genes is None and X_sparse and X_large: warnings.warn( "Returning imputed values for all genes on a ({} x " "{}) matrix will require approximately {:.2f}GB of " @@ -603,15 +552,15 @@ def transform(self, X=None, genes=None, t_max=20, plot_optimal_t=False, ax=None) store_result = True genes = self._parse_genes(X, genes) + if genes is None: + genes_is_short = False + else: + genes_is_short = len(genes) < self.graph.data_nu.shape[1] if isinstance(genes, str) and genes == "pca_only": # have to use PCA to return it solver = "approximate" - elif ( - genes is not None - and self.X_magic is None - and len(genes) < self.graph.data_nu.shape[1] - ): + elif self.X_magic is None and genes_is_short: # faster to skip PCA solver = "exact" store_result = False @@ -637,8 +586,8 @@ def transform(self, X=None, genes=None, t_max=20, plot_optimal_t=False, ax=None) _logger.warning( "Running MAGIC with `solver='exact'` on " "{}-dimensional data may take a long time. " - "Consider denoising specific genes with `genes=` or using " - "`solver='approximate'`.".format(X_input.shape[1]) + "Consider denoising specific genes with `genes=` " + "or using `solver='approximate'`.".format(X_input.shape[1]) ) X_magic = self._impute(X_input, t_max=t_max, plot=plot_optimal_t, ax=ax) if store_result: @@ -661,8 +610,7 @@ def transform(self, X=None, genes=None, t_max=20, plot_optimal_t=False, ax=None) return X_magic def fit_transform(self, X, graph=None, **kwargs): - """Computes the diffusion operator and the position of the cells in the - embedding space + """Computes the diffusion operator and the denoised gene expression Parameters ---------- diff --git a/python/magic/plot.py b/python/magic/plot.py index 136f778c..88403f8e 100644 --- a/python/magic/plot.py +++ b/python/magic/plot.py @@ -1,16 +1,15 @@ -# author: Scott Gigante # (C) 2017 Krishnaswamy Lab GPLv2 +from .magic import MAGIC +from .utils import in_ipynb +from matplotlib import animation +from matplotlib import rc + import matplotlib.pyplot as plt -import pandas as pd -import numpy as np -from matplotlib import rc, animation import numbers +import numpy as np +import pandas as pd import scprep -from scipy import sparse - -from .magic import MAGIC -from .utils import in_ipynb def _validate_gene(gene, data): @@ -44,7 +43,7 @@ def animate_magic( dpi=100, ipython_html="jshtml", verbose=False, - **kwargs + **kwargs, ): """Animate a gene-gene relationship with increased diffusion diff --git a/python/magic/preprocessing.py b/python/magic/preprocessing.py deleted file mode 100644 index a16e87b0..00000000 --- a/python/magic/preprocessing.py +++ /dev/null @@ -1,17 +0,0 @@ -# author: Daniel Burkhardt -# (C) 2017 Krishnaswamy Lab GPLv2 - -from __future__ import print_function, division -import warnings -import scprep - - -def library_size_normalize(data, verbose=False): - """magic.preprocessing is deprecated. Please use scprep.normalize instead. - Read more at http://scprep.readthedocs.io/ - """ - raise RuntimeError( - "magic.preprocessing is deprecated. Please use scprep.normalize instead. " - "Read more at http://scprep.readthedocs.io", - FutureWarning, - ) diff --git a/python/magic/utils.py b/python/magic/utils.py index 69a74a39..e045c663 100644 --- a/python/magic/utils.py +++ b/python/magic/utils.py @@ -1,8 +1,9 @@ +from scipy import sparse + import numbers import numpy as np import pandas as pd import scprep -from scipy import sparse try: import anndata @@ -12,7 +13,7 @@ def check_positive(**params): - """Check that parameters are positive as expected + """Check that parameters are positive as expected. Raises ------ @@ -24,7 +25,7 @@ def check_positive(**params): def check_int(**params): - """Check that parameters are integers as expected + """Check that parameters are integers as expected. Raises ------ @@ -36,7 +37,7 @@ def check_int(**params): def check_if_not(x, *checks, **params): - """Run checks only if parameters are not equal to a specified value + """Run checks only if parameters are not equal to a specified value. Parameters ---------- @@ -60,7 +61,7 @@ def check_if_not(x, *checks, **params): def check_in(choices, **params): - """Checks parameters are in a list of allowed parameters + """Checks parameters are in a list of allowed parameters. Parameters ---------- @@ -84,7 +85,7 @@ def check_in(choices, **params): def check_between(v_min, v_max, **params): - """Checks parameters are in a specified range + """Checks parameters are in a specified range. Parameters ---------- @@ -109,9 +110,7 @@ def check_between(v_min, v_max, **params): def matrix_is_equivalent(X, Y): - """ - Checks matrix equivalence with numpy, scipy and pandas - """ + """Check matrix equivalence with numpy, scipy and pandas.""" if X is Y: return True elif X.shape == Y.shape: @@ -131,6 +130,7 @@ def matrix_is_equivalent(X, Y): def convert_to_same_format(data, target_data, columns=None, prevent_sparse=False): + """Convert data to same format as target data.""" # create new data object if scprep.utils.is_sparse_dataframe(target_data): if prevent_sparse: @@ -173,7 +173,7 @@ def convert_to_same_format(data, target_data, columns=None, prevent_sparse=False def in_ipynb(): - """Check if we are running in a Jupyter Notebook + """Check if we are running in a Jupyter Notebook. Credit to https://stackoverflow.com/a/24937408/3996580 """ @@ -188,6 +188,7 @@ def in_ipynb(): def is_anndata(data): + """Check if an object is an AnnData object.""" try: return isinstance(data, anndata.AnnData) except NameError: @@ -196,6 +197,7 @@ def is_anndata(data): def has_empty_columns(data): + """Check if an object has empty columns.""" try: return np.any(np.array(data.sum(0)) == 0) except AttributeError: diff --git a/python/magic/version.py b/python/magic/version.py index f6bb6f4d..528787cf 100644 --- a/python/magic/version.py +++ b/python/magic/version.py @@ -1 +1 @@ -__version__ = "2.0.4" +__version__ = "3.0.0" diff --git a/python/setup.py b/python/setup.py index 9535e4a8..6f41e995 100644 --- a/python/setup.py +++ b/python/setup.py @@ -1,7 +1,8 @@ -import os -import sys +from setuptools import find_packages from setuptools import setup +import os + install_requires = [ "numpy>=1.14.0", "scipy>=1.1.0", @@ -14,23 +15,13 @@ "scprep>=1.0", ] -test_requires = [ - "nose2", -] - -if sys.version_info[0] == 3: - test_requires += ["anndata"] +test_requires = ["nose2", "anndata", "coverage", "coveralls"] doc_requires = [ "sphinx", "sphinxcontrib-napoleon", ] -if sys.version_info[:2] < (3, 5): - raise RuntimeError("Python version >=3.5 required.") -elif sys.version_info[:2] >= (3, 6): - test_requires += ["black"] - version_py = os.path.join(os.path.dirname(__file__), "magic", "version.py") version = open(version_py).read().strip().split("=")[-1].replace('"', "").strip() @@ -42,10 +33,9 @@ description="MAGIC", author="", author_email="", - packages=[ - "magic", - ], + packages=find_packages(), license="GNU General Public License Version 2", + python_requires=">=3.6", install_requires=install_requires, extras_require={"test": test_requires, "doc": doc_requires}, test_suite="nose2.collector.collector", diff --git a/python/test/test.py b/python/test/test.py index cb5981b2..ff023ea0 100644 --- a/python/test/test.py +++ b/python/test/test.py @@ -1,21 +1,20 @@ #!/usr/bin/env python -from __future__ import print_function, division, absolute_import -import matplotlib as mpl - -mpl.use("agg") import magic +import matplotlib as mpl import numpy as np +import os import scprep +mpl.use("agg") + try: import anndata except (ImportError, SyntaxError): # anndata not installed pass -import os data_path = os.path.join("..", "data", "test_data.csv") if not os.path.isfile(data_path): diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..ec27580c --- /dev/null +++ b/setup.cfg @@ -0,0 +1,24 @@ +[flake8] +ignore = + # top-level module docstring + D100, D104, + # space before : conflicts with black + E203 +per-file-ignores = + # imported but unused + __init__.py: F401 + # missing docstring in public function for methods, metrics, datasets + openproblems/tasks/*/*/*.py: D103, E203 + openproblems/tasks/*/*/__init__.py: F401, D103 +max-line-length = 88 +exclude = + .git, + __pycache__, + build, + dist, + Snakefile + +[isort] +profile = black +force_single_line = true +force_alphabetical_sort = true