diff --git a/.gitignore b/.gitignore index 36389d62..fce513ba 100644 --- a/.gitignore +++ b/.gitignore @@ -23,7 +23,6 @@ website/**/* .cache/ .idea/ **/.ipynb_checkpoints/ -.idea/ .vscode .coverage* coverage.xml @@ -31,11 +30,15 @@ coverage.xml test/data/maelstrom/activity* test/data/maelstrom/final* test/data/maelstrom/*png +test/data/maelstrom/*pfm +test/data/maelstrom/*txt* test/data/maelstrom/motif.freq.txt test/data/maelstrom/gimme.vertebrate.v3.1.motif2factors.txt test/data/maelstrom/gimme.vertebrate.v3.1.pwm test/data/maelstrom/input.table.txt -test/**/*fai +test/**/*.fa.fai +test/**/*.fa.sizes +test/**/*.gaps.bed tmp/* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0976b100..32ee7365 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,9 +1,18 @@ repos: + - repo: https://github.com/pycqa/isort + rev: 5.10.1 + hooks: + - id: isort + - repo: https://github.com/ambv/black - rev: 21.5b1 + rev: 22.3.0 hooks: - id: black + - repo: https://gitlab.com/pycqa/flake8 - rev: 3.9.2 + rev: 4.0.1 hooks: - id: flake8 + additional_dependencies: [ + 'flake8-bugbear==22.4.25', + ] diff --git a/.rtd-environment.yml b/.rtd-environment.yml index ff9bfc26..a048a51e 100644 --- a/.rtd-environment.yml +++ b/.rtd-environment.yml @@ -1,37 +1,34 @@ -name: gimmemotifs channels: - - defaults - - bioconda - conda-forge + - bioconda + - defaults dependencies: - - biofluff + # docs specific packages + - sphinx_bootstrap_theme + + # gimme packages (without the motif discovery tools) - configparser - - diskcache - - feather-format - - genomepy >=0.8.3 + - conda-forge::diskcache + - conda-forge::feather-format + - bioconda::genomepy >=0.11.1 + - ipywidgets # Necessary for progress bar in Jupyter notebook + - conda-forge::iteround - jinja2 - - logomaker + - bioconda::logomaker + - loguru - matplotlib-base >=3.1.2 - - ncurses - - numpy - - pandas >=1.0.3 - - pillow - - pip + - numpy >= 1.6.0 + - pandas >=1.0.3, <=1.1.5 # 1.3.5/1.4.2 are bugged - pyarrow >=0.16.0 - - pybedtools - - pybigwig - - python >=3 - - pyyaml >=3.10 - - qnorm - - represent - - scikit-learn >=0.18 - - scipy >=1.3.0 - - seaborn + - bioconda::pybedtools >=0.9.0 + - bioconda::pysam >=0.16 + - python >=3.8 + - python-xxhash + - conda-forge::qnorm >=0.8.1 + - scikit-learn >=0.23.2 + - scipy >=1.4.1 + - seaborn >=0.10.1 - statsmodels - - tqdm >=4.27.0 + - tqdm >=4.46.1 - xdg - - xgboost >=0.71 - - sphinx_bootstrap_theme - - numpydoc - - pip: - - xxhash + - xgboost >=1.0.2 diff --git a/.travis.yml b/.travis.yml index cf5efbbd..bc2fedf1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,15 +4,9 @@ branches: - master - develop -matrix: - allow_failures: - - os: osx - include: - - os: osx - language: generic - - os: linux - language: python - python: "3.6" +os: linux +dist: focal +language: minimal env: global: @@ -20,48 +14,31 @@ env: before_install: # setup miniconda - - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; - else - wget https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh; - fi + - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh - chmod +x miniconda.sh - ./miniconda.sh -b -p $HOME/miniconda -f - - source "$HOME/miniconda/etc/profile.d/conda.sh" - - hash -r - - conda config --set always_yes yes --set changeps1 no - - conda update -q conda - - conda info -a - - if [ "$TRAVIS_OS_NAME" == "osx" ]; then ulimit -S -n 4096; ulimit -a; fi + - export PATH=$HOME/miniconda/bin:$PATH; + - conda config --set always_yes True install: - - conda config --add channels defaults - - conda config --add channels bioconda - - conda config --add channels conda-forge - - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then - conda install mamba -y; - mamba env create -q -f conda_env.dev.txt -n gimme; - else - conda env create -q -f conda_env.osx.txt -n gimme; - fi - - conda activate gimme - - conda list - - python setup.py build && pip install -e . + - conda install conda-forge::mamba + - mamba env create -f requirements.yaml + - source activate gimme + - python setup.py build # installs the motif discovery tools + - pip install -e . # installs gimme (in editable mode) before_script: # install Code Climate test coverage reporter - - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then - wget -O cc-test-reporter https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64; - else - wget -O cc-test-reporter https://codeclimate.com/downloads/test-reporter/test-reporter-latest-darwin-amd64; - fi + - wget -O cc-test-reporter https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 - chmod +x ./cc-test-reporter - ./cc-test-reporter before-build script: + # generate the config and print the cache location - python -c 'from gimmemotifs.config import CACHE_DIR;print(CACHE_DIR)' - - pytest -vv --disable-pytest-warnings --cov=scripts --cov=gimmemotifs --cov-report=xml test/ + # run the tests + - pytest -vvv --disable-pytest-warnings --cov=scripts --cov=gimmemotifs --cov-report=xml test/ after_script: # upload test coverage data to Code Climate - - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then ./cc-test-reporter after-build -t coverage.py --exit-code $TRAVIS_TEST_RESULT; fi + - ./cc-test-reporter after-build -t coverage.py --exit-code $TRAVIS_TEST_RESULT diff --git a/CHANGELOG.md b/CHANGELOG.md index ddd500d2..ac7cb839 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,12 +7,32 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Added +- `requirements.yaml` contains all conda dependencies. + - packages available from one channel have been pinned (for solving speed) + - packages have minimum versions where known (for solving speed) + ### Changed +- alphabetized tools everywhere (how could you live like that!?) +- updated `setup.py` +- updated installation instructions + ### Fixed +- Yamda is now recognized in the config +- most tools work with the editable installation again +- all tests work for unix + - there were still some flakey values, where randomness is involved. +- background.py updated to work with the specified minimum `genomepy` version +- all `sphinx-build docs build` warnings + ### Removed +- a bunch of redundant requirement files. +- OSX tests. Possibly temporary. + - The tests haven't working for ages, so I have no idea where to begin. + - and Travis asks 5x credits for OSX machines... + ## [0.17.0] - 2021-12-22 diff --git a/MANIFEST.in b/MANIFEST.in index 30c8b417..1bfcd51f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,24 +1,16 @@ -include README.md -include INSTALL -include COPYING include CHANGELOG.md -include run_tests.py +include CITATION.cff +include COPYING +include INSTALL include compile_externals.py -recursive-include src * -recursive-include test/data/ * -include test/*.py -include test/data/*/* -include scripts/gimme -include scripts/coverage_table -include scripts/combine_peaks +include README.md +prune test/ graft data/ -recursive-exclude data/examples/ * -include data/examples/TAp73alpha.bed -include data/examples/TAp73alpha.fa -include data/examples/test.small.fa -include data/examples/example.pfm -include data/examples/Gm12878.CTCF.top500.w200.fa -include data/examples/MA0099.3.jaspar +graft src/ +graft scripts/ +exclude scripts/download_motif_databases.py +exclude scripts/gimme_benchmark + include versioneer.py include gimmemotifs/_version.py diff --git a/conda_env.dev.txt b/conda_env.dev.txt deleted file mode 100644 index 6551b336..00000000 --- a/conda_env.dev.txt +++ /dev/null @@ -1,47 +0,0 @@ -bedtools -configparser -dinamo -diskcache -feather-format -gadem -genomepy >=0.8.3 -homer -ipywidgets # Necessary for progress bar in Jupyter notebook -jinja2 -logomaker -loguru -matplotlib >=2.0 -meme >=5 -ncurses -numpy -prosampler -pillow -pyarrow >=0.16.0 -pybedtools -pysam -python -python-xxhash -pyyaml >=3.10 -qnorm -scikit-learn >=0.23 -scipy >=1.4.1 -seaborn -statsmodels -tqdm >=4.27.0 -trawler -ucsc-bigbedtobed -ucsc-genepredtobed -weeder -xdg -xgboost >=0.71 -xxmotif -iteround - -# development-specific -black=21.5b1 # same as in .pre-commit-config.yaml -flake8=3.9.2 # same as in .pre-commit-config.yaml -flake8-bugbear -pre-commit -pytest -pytest-cov -twine diff --git a/conda_env.test.txt b/conda_env.test.txt deleted file mode 100644 index 4883216e..00000000 --- a/conda_env.test.txt +++ /dev/null @@ -1,39 +0,0 @@ -bedtools -configparser -dinamo -diskcache -feather-format -gadem -genomepy >=0.6.1 -ghostscript -homer -icu=58 -ipywidgets # Necessary for progress bar in Jupyter notebook -jinja2 -logomaker -loguru -matplotlib >=2.0 -meme >=5 -ncurses -numpy -pillow -prosampler -pyarrow -pybedtools -python >=3.8 -python-xxhash -pyyaml >=3.10 -qnorm -scikit-learn >=0.18 -scipy <1.3.0 -seaborn -statsmodels -tqdm >=4.27.0 -trawler -ucsc-bigbedtobed -ucsc-genepredtobed -weeder -xdg -xgboost >=0.71 -xxmotif -iteround diff --git a/conda_env.txt b/conda_env.txt deleted file mode 100644 index c8cf3ab4..00000000 --- a/conda_env.txt +++ /dev/null @@ -1,38 +0,0 @@ -configparser -dinamo -diskcache -feather-format -gadem -genomepy >=0.8.3 -ghostscript -homer -ipywidgets # Necessary for progress bar in Jupyter notebook -jinja2 -logomaker -loguru -matplotlib-base >=3.1.2 -meme >=5.1.1 -ncurses -numpy -pandas >=1.0.3 -pillow -prosampler -pyarrow >=0.16.0 -pybedtools -pysam -python >=3 -python-xxhash -pyyaml >=3.10 -qnorm -scikit-learn >=0.18 -scipy <1.3.0 -seaborn -statsmodels -tqdm >=4.27.0 -trawler -ucsc-bigbedtobed -weeder -xdg -xgboost >=0.71 -xxmotif -iteround diff --git a/data/cfg/gimmemotifs.default.cfg b/data/cfg/gimmemotifs.default.cfg index 67f0744e..88cf99d2 100644 --- a/data/cfg/gimmemotifs.default.cfg +++ b/data/cfg/gimmemotifs.default.cfg @@ -17,8 +17,8 @@ lsize = 500 background = gc,random cluster_threshold = 0.95 scan_cutoff = 0.9 -available_tools = MDmodule,MEME,Weeder,GADEM,MotifSampler,Trawler,Improbizer,BioProspector,Posmo,ChIPMunk,AMD,Homer,ProSampler,YAMDA,DiNAMO,RPMCMC,DREME -tools = MEME,BioProspector,Homer +available_tools = AMD,BioProspector,ChIPMunk,DiNAMO,DREME,GADEM,HMS,Homer,Improbizer,MDmodule,MEME,MEMEW,MotifSampler,Posmo,ProSampler,RPMCMC,Trawler,Weeder,XXmotif,YAMDA +tools = BioProspector,Homer,MEME pvalue = 0.001 max_time = -1 ncpus = 12 @@ -26,52 +26,68 @@ motif_db = gimme.vertebrate.v5.0.pfm use_cache = False -[YAMDA] -bin = run_em.py +[AMD] +bin = AMD.bin +dir = included_tools/ + +[BioProspector] +bin = BioProspector dir = included_tools/ +[ChIPMunk] +bin = ChIPMunk.sh +dir = included_tools/ChIPMunk + [DiNAMO] bin = dinamo dir = included_tools/ -[RPMCMC] -bin = multi_motif_finder +[DREME] +bin = dreme-py3 dir = included_tools/ -[ProSampler] -bin = ProSampler +[GADEM] +bin = gadem dir = included_tools/ -[AMD] -bin = AMD.bin +[HMS] +bin = hms +dir = included_tools/HMS + +[Homer] +bin = homer2 dir = included_tools/ -[MEME] -bin = meme +[Improbizer] +bin = ameme dir = included_tools/ -[DREME] -bin = dreme-py3 +[MDmodule] +bin = MDmodule +dir = included_tools/ + +[MEME] +bin = meme dir = included_tools/ [MEMEW] bin = meme dir = included_tools/ -[MDmodule] -bin = MDmodule +[MotifSampler] +bin = MotifSampler dir = included_tools/ -[Improbizer] -bin = ameme +[Posmo] +bin = posmo dir = included_tools/ -[MotifSampler] -bin = MotifSampler +[ProSampler] +bin = ProSampler dir = included_tools/ -[GADEM] -bin = gadem +[RPMCMC] +bin = multi_motif_finder dir = included_tools/ # This section works with trawler as installed by bioconda. @@ -88,26 +104,10 @@ dir = included_tools/ bin = weeder2 dir = included_tools/ -[BioProspector] -bin = BioProspector -dir = included_tools/ - -[ChIPMunk] -bin = ChIPMunk.sh -dir = included_tools/ChIPMunk - -[Homer] -bin = homer2 -dir = included_tools/ - [XXmotif] bin = XXmotif dir = included_tools/ -[Posmo] -bin = posmo +[Yamda] +bin = run_em.py dir = included_tools/ - -[HMS] -bin = hms -dir = included_tools/HMS diff --git a/docs/Makefile b/docs/Makefile index df776b41..bbf99dd8 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -1,153 +1,20 @@ -# Makefile for Sphinx documentation +# Minimal makefile for Sphinx documentation # -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -PAPER = +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = docs BUILDDIR = _build -# Internal variables. -PAPEROPT_a4 = -D latex_paper_size=a4 -PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . -# the i18n builder cannot share the environment and doctrees with the others -I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . - -.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext - +# Put it first so that "make" without argument is like "make help". help: - @echo "Please use \`make ' where is one of" - @echo " html to make standalone HTML files" - @echo " dirhtml to make HTML files named index.html in directories" - @echo " singlehtml to make a single large HTML file" - @echo " pickle to make pickle files" - @echo " json to make JSON files" - @echo " htmlhelp to make HTML files and a HTML help project" - @echo " qthelp to make HTML files and a qthelp project" - @echo " devhelp to make HTML files and a Devhelp project" - @echo " epub to make an epub" - @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" - @echo " latexpdf to make LaTeX files and run them through pdflatex" - @echo " text to make text files" - @echo " man to make manual pages" - @echo " texinfo to make Texinfo files" - @echo " info to make Texinfo files and run them through makeinfo" - @echo " gettext to make PO message catalogs" - @echo " changes to make an overview of all changed/added/deprecated items" - @echo " linkcheck to check all external links for integrity" - @echo " doctest to run all doctests embedded in the documentation (if enabled)" - -clean: - -rm -rf $(BUILDDIR)/* - -html: - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." - -dirhtml: - $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." - -singlehtml: - $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml - @echo - @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." - -pickle: - $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle - @echo - @echo "Build finished; now you can process the pickle files." - -json: - $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json - @echo - @echo "Build finished; now you can process the JSON files." - -htmlhelp: - $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp - @echo - @echo "Build finished; now you can run HTML Help Workshop with the" \ - ".hhp project file in $(BUILDDIR)/htmlhelp." - -qthelp: - $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp - @echo - @echo "Build finished; now you can run "qcollectiongenerator" with the" \ - ".qhcp project file in $(BUILDDIR)/qthelp, like this:" - @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/GimmeMotifs.qhcp" - @echo "To view the help file:" - @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/GimmeMotifs.qhc" - -devhelp: - $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp - @echo - @echo "Build finished." - @echo "To view the help file:" - @echo "# mkdir -p $$HOME/.local/share/devhelp/GimmeMotifs" - @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/GimmeMotifs" - @echo "# devhelp" - -epub: - $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub - @echo - @echo "Build finished. The epub file is in $(BUILDDIR)/epub." - -latex: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo - @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." - @echo "Run \`make' in that directory to run these through (pdf)latex" \ - "(use \`make latexpdf' here to do that automatically)." - -latexpdf: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo "Running LaTeX files through pdflatex..." - $(MAKE) -C $(BUILDDIR)/latex all-pdf - @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." - -text: - $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text - @echo - @echo "Build finished. The text files are in $(BUILDDIR)/text." - -man: - $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man - @echo - @echo "Build finished. The manual pages are in $(BUILDDIR)/man." - -texinfo: - $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo - @echo - @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." - @echo "Run \`make' in that directory to run these through makeinfo" \ - "(use \`make info' here to do that automatically)." - -info: - $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo - @echo "Running Texinfo files through makeinfo..." - make -C $(BUILDDIR)/texinfo info - @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." - -gettext: - $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale - @echo - @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." - -changes: - $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes - @echo - @echo "The overview file is in $(BUILDDIR)/changes." + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -linkcheck: - $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck - @echo - @echo "Link check complete; look for any errors in the above output " \ - "or in $(BUILDDIR)/linkcheck/output.txt." +.PHONY: help Makefile -doctest: - $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest - @echo "Testing of doctests in the sources finished, look at the " \ - "results in $(BUILDDIR)/doctest/output.txt." +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/api.rst b/docs/api.rst index 9e13ce93..6b2b7750 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -3,10 +3,8 @@ API documentation ================= -.. toctree:: - :maxdepth: 2 - - api +.. contents:: + :depth: 2 Examples ======== @@ -32,7 +30,7 @@ There are several ways to create a Motif instance. for motif in motifs: print(motif) -:: +.. code-block:: python AP1_nTGAGTCAy CTCF_CCAsyAGrkGGCr @@ -44,7 +42,7 @@ There are several ways to create a Motif instance. m.id = "CpG" print(m) -:: +.. code-block:: python CpG_CG @@ -101,7 +99,7 @@ You can convert a motif to several formats. # pfm print(motifs[0].to_pfm()) -:: +.. code-block:: python >AP1 555.8 146.9 273.4 24.0 @@ -119,7 +117,7 @@ You can convert a motif to several formats. # consensus sequence print(motifs[0].to_consensus()) -:: +.. code-block:: python ATGAsTCAy @@ -128,7 +126,7 @@ You can convert a motif to several formats. # TRANSFAC print(motifs[0].to_transfac()) -:: +.. code-block:: python DE AP1 unknown 0 555 146 273 24 A @@ -147,7 +145,7 @@ You can convert a motif to several formats. # MEME print(motifs[0].to_meme()) -:: +.. code-block:: python MOTIF AP1 BL MOTIF AP1 width=0 seqs=0 @@ -170,7 +168,7 @@ Some other useful tidbits. m = motif_from_consensus("NTGASTCAN") print(len(m)) -:: +.. code-block:: python 9 @@ -181,7 +179,7 @@ Some other useful tidbits. m.trim(0.5) print(m.to_consensus(), len(m)) -:: +.. code-block:: python TGAsTCA 7 @@ -190,7 +188,7 @@ Some other useful tidbits. # Slices print(m[:3].to_consensus()) -:: +.. code-block:: python TGA @@ -200,7 +198,7 @@ Some other useful tidbits. random_motif = motif_from_consensus("NTGASTGAN").randomize() print(random_motif) -:: +.. code-block:: python random_snCTAGTAn @@ -242,7 +240,7 @@ Now we can use this file for scanning. m.pwm_scan(f) -:: +.. code-block:: python {'seq1': [], 'seq2': [6, 6], 'seq3': [0, 16, 0, 16]} @@ -255,7 +253,7 @@ This is more clear when we use ``pwm_scan_all()`` that returns position, score a m.pwm_scan_all(f) -:: +.. code-block:: python {'seq1': [], 'seq2': [(6, 9.02922042678255, 1), (6, 9.02922042678255, -1)], @@ -271,7 +269,7 @@ Use ``scan_rc=False`` to only scan the forward orientation. m.pwm_scan_all(f, nreport=1, scan_rc=False) -:: +.. code-block:: python {'seq1': [], 'seq2': [(6, 9.02922042678255, 1)], @@ -299,7 +297,7 @@ If you only want the best match per sequence, is a utility function called ``sca for match in matches: print("{}\t{}\t{}".format(motif, match[1], match[0])) -:: +.. code-block:: python motif pos score CG 0 -18.26379789133924 @@ -346,7 +344,7 @@ Now let's get the best score for the CTCF motif for each sequence. np.max(scores) )) -:: +.. code-block:: python 500 11.00 1.45 15.07 @@ -366,7 +364,7 @@ This means that for the same combination of motifs and genome, the previously ge counts = [n[0] for n in s.count("Gm12878.CTCF.top500.w200.fa", nreport=1)] print(counts[:10]) -:: +.. code-block:: python [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] @@ -375,7 +373,7 @@ This means that for the same combination of motifs and genome, the previously ge # or the grand total of number of sequences with 1 match print(s.total_count("examples/Gm12878.CTCF.top500.w200.fa", nreport=1)) -:: +.. code-block:: python [404] @@ -389,7 +387,8 @@ This means that for the same combination of motifs and genome, the previously ge motif = motifs[m] for score, pos, strand in matches: print(seqname, motif, score, pos, strand) -:: + +.. code-block:: python chr11:190037-190237 C2H2_ZF_Average_200_CCAsyAGrkGGCr 13.4959558370929 143 -1 chr11:190037-190237 C2H2_ZF_Average_200_CCAsyAGrkGGCr 10.821440417077262 22 -1 @@ -449,7 +448,7 @@ For a basic example we'll just use two motif finders, as they're quick to run. motifs = gimme_motifs(peaks, outdir, params=params) -:: +.. code-block:: python 2017-06-30 07:37:00,079 - INFO - starting full motif analysis 2017-06-30 07:37:00,082 - INFO - preparing input (FASTA) @@ -513,7 +512,7 @@ Although the ``gimme_motifs()`` function is probably the easiest way to run the motifs, stdout, stderr = m.run("TAp73alpha.fa", params=params) print(motifs[0].to_consensus()) -:: +.. code-block:: python nnnCnTGynnnGrCwTGyyn @@ -551,7 +550,7 @@ Then we only select p53 motifs. print("Best motif (recall at 10% FDR):", best_motif) -:: +.. code-block:: python Stats for GM.5.0.p53.0001_rCATGyCCnGrCATGy recall_at_fdr 0.833 @@ -587,7 +586,7 @@ You can choose one or more specific metrics with the additional ``stats`` argume )) -:: +.. code-block:: python p53_M5923_1.01 roc_auc 0.63 p53_M5922_1.01 roc_auc 0.64 @@ -626,7 +625,7 @@ Compare two motifs. print(pad1 + m1.to_consensus()) print(pad2 + m2.to_consensus()) -:: +.. code-block:: python rrrCATGyyy ACAyGA @@ -664,7 +663,7 @@ Find closest match in a motif database. print(" " * padd + dbmotif.to_consensus()) print() -:: +.. code-block:: python GATA: AGATAASR_GATA3(Zf)/iTreg-Gata3-ChIP-Seq(GSE20898)/Homer - 0.823 GATA diff --git a/docs/conf.py b/docs/conf.py index e53c9859..e5783937 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,309 +1,66 @@ -# -*- coding: utf-8 -*- +# Configuration file for the Sphinx documentation builder. # -# GimmeMotifs documentation build configuration file, created by -# sphinx-quickstart on Tue Feb 18 16:54:27 2014. -# -# This file is execfile()d with the current directory set to its containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. -import sphinx_bootstrap_theme -import sys, os -from unittest.mock import MagicMock - - -class Mock(MagicMock): - @classmethod - def __getattr__(cls, name): - return MagicMock() +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html - -MOCK_MODULES = [ -# "numpy", -# "pandas", -# "scipy", -# "scipy.cluster", -# "scipy.cluster.hierarchy", -# "scipy.spatial", -# "scipy.stats", -# "matplotlib", -# "matplotlib.axes", -# "matplotlib.cm", -# "matplotlib.colors", -# "matplotlib.gridspec", -# "matplotlib.patches", -# "matplotlib.pyplot", -# "matplotlib.textpath", - "pybedtools", - "gimmemotifs.c_metrics", - "statsmodels", - "statsmodels.sandbox.stats.multicomp", - "statsmodels.stats", - "statsmodels.stats.multitest", - "mpl_toolkits.axes_grid1", - "pymc", - "sklearn", - "sklearn.cluster", - "sklearn.ensemble", - "sklearn.linear_model", - "sklearn.feature_selection", - "sklearn.metrics", - "sklearn.model_selection", - "sklearn.multiclass", - "sklearn.multioutput", - "sklearn.svm", - "sklearn.pipeline", - "sklearn.preprocessing", - "sklearn-contrib-lightning", - "sklearn.metrics.pairwise", - "lightning.classification", - "lightning.regression", - "lightning", - "seaborn", - "pysam", - "xgboost", - "pillow", -] -sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES) +# -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.insert(0, os.path.abspath("..")) -# -- General configuration ----------------------------------------------------- +import os +import sys +sys.path.insert(0, os.path.abspath('..')) +from gimmemotifs import __version__ # noqa -# If your documentation needs a minimal Sphinx version, state it here. -# needs_sphinx = '1.0' -# Add any Sphinx extension module names here, as strings. They can be extensions -# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ["sphinx.ext.autodoc", "sphinx.ext.autosummary", "numpydoc"] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] +# -- Project information ----------------------------------------------------- -# The suffix of source filenames. -source_suffix = ".rst" - -# The encoding of source files. -# source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = "index" - -# General information about the project. -project = u"GimmeMotifs" -copyright = u"2019, Simon van Heeringen, licensed under CC BY 4.0" - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -sys.path.append("../") -from gimmemotifs import __version__ +project = 'GimmeMotifs' +copyright = '2022, Simon van Heeringen, licensed under CC BY 4.0' +author = 'Simon van Heeringen, Siebren Frölich, Maarten van der Sande' +# Major, minor and hotfix versions version = __version__.split("+")[0] -# The full version, including alpha/beta/rc tags. +# The full version, including alpha/beta/rc tags release = __version__ -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# language = None - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -# today = '' -# Else, today_fmt is used as the format for a strftime call. -# today_fmt = '%B %d, %Y' -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -exclude_patterns = ["_build"] +# -- General configuration --------------------------------------------------- -# The reST default role (used for this markup: `text`) to use for all documents. -# default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -# add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -# add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -# show_authors = False +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', # automatic documentation from docstrings + 'sphinx.ext.coverage', # gather documentation coverage stats + 'sphinx.ext.napoleon', # recognize numpy & google style docstrings + 'sphinx.ext.autosummary', # Create neat summary tables + # 'numpydoc', +] -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = "sphinx" +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] -# A list of ignored prefixes for module index sorting. -# modindex_common_prefix = [] +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] -# -- Options for HTML output --------------------------------------------------- +# -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = "bootstrap" - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -# html_theme_options = {} - -# Add any paths that contain custom themes here, relative to this directory. -html_theme_path = sphinx_bootstrap_theme.get_html_theme_path() - -# The name for this set of Sphinx documents. If None, it defaults to -# " v documentation". -# html_title = None - -# A shorter title for the navigation bar. Default is the same as html_title. -# html_short_title = None - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -# html_logo = None - -# The name of an image file (within the static path) to use as favicon of the -# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -# html_favicon = None +# +html_theme = "bootstrap" # 'alabaster' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["_static"] - -# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, -# using the given strftime format. -# html_last_updated_fmt = '%b %d, %Y' - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -# html_use_smartypants = True - -# Custom sidebar templates, maps document names to template names. -# html_sidebars = {'**':['globaltoc.html', 'relations.html', 'sourcelink.html', 'searchbox.html'],} - -# Additional templates that should be rendered to pages, maps page names to -# template names. -# html_additional_pages = {} - -# If false, no module index is generated. -# html_domain_indices = True - -# If false, no index is generated. -# html_use_index = True - -# If true, the index is split into individual pages for each letter. -# html_split_index = False - -# If true, links to the reST sources are added to the pages. -# html_show_sourcelink = True - -# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -# html_show_sphinx = True - -# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -# html_show_copyright = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -# html_use_opensearch = '' - -# This is the file name suffix for HTML files (e.g. ".xhtml"). -# html_file_suffix = None - -# Output file base name for HTML help builder. -htmlhelp_basename = "GimmeMotifsdoc" - - -# -- Options for LaTeX output -------------------------------------------------- - -latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - #'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). - #'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. - #'preamble': '', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, author, documentclass [howto/manual]). -latex_documents = [ - ( - "index", - "GimmeMotifs.tex", - u"GimmeMotifs Documentation", - u"Simon van Heeringen", - "manual", - ), -] - -# The name of an image file (relative to this directory) to place at the top of -# the title page. -# latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -# latex_use_parts = False - -# If true, show page references after internal links. -# latex_show_pagerefs = False - -# If true, show URL addresses after external links. -# latex_show_urls = False - -# Documents to append as an appendix to all manuals. -# latex_appendices = [] - -# If false, no module index is generated. -# latex_domain_indices = True - - -# -- Options for manual page output -------------------------------------------- - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - ("index", "gimmemotifs", u"GimmeMotifs Documentation", [u"Simon van Heeringen"], 1) -] - -# If true, show URL addresses after external links. -# man_show_urls = False - - -# -- Options for Texinfo output ------------------------------------------------ - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - ( - "index", - "GimmeMotifs", - u"GimmeMotifs Documentation", - u"Simon van Heeringen", - "GimmeMotifs", - "One line description of project.", - "Miscellaneous", - ), -] - -# Documents to append as an appendix to all manuals. -# texinfo_appendices = [] +html_static_path = ['_static'] -# If false, no module index is generated. -# texinfo_domain_indices = True -# How to display URL addresses: 'footnote', 'no', or 'inline'. -# texinfo_show_urls = 'footnote' +# -- Extension configuration ------------------------------------------------- diff --git a/docs/faq.rst b/docs/faq.rst index 9121876c..179b3005 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -40,11 +40,11 @@ fix it by deleting the GimmeMotifs cache directory `~/.cache/gimmemotifs`. SQLite error when running on a cluster -------------------------------------- -The current implementation of the cache that GimmeMotifs uses does not play nice with concurrent access, for instance on - a cluster. The result is that the cache will get corrupted and that GimmeMotifs will fail. Until this is fixed, there i -s a workaround. In your job submission script, use something like the following: +The current implementation of the cache that GimmeMotifs uses does not play nice with concurrent access, for instance on a cluster. +The result is that the cache will get corrupted and that GimmeMotifs will fail. Until this is fixed, there is a workaround. +In your job submission script, use something like the following: -:: +.. code-block:: bash NEW_CACHE=$TMPDIR/cache mkdir -p $NEW_CACHE @@ -66,14 +66,14 @@ The recommended solution is to upgrade Ubuntu as Trusty is no longer supported. However, if you are unable to do so, there is a workaround. Run the following command: -:: +.. code-block:: bash $ export LD_PRELOAD=$CONDA_PREFIX/lib/libgomp.so Now you should be able to run `gimme` as normal. 'i' format requires -2147483648 <= number <= -2147483646 -------------------------------------------------------- +-------------------------------------------------------- If you get the following error with `gimme maelstrom`: @@ -117,13 +117,13 @@ The different methods use different ways to rank the motifs. The hypergeometric I have upgraded GimmeMotifs and now it doesn't find my genome ------------------------------------------------------------- -The genome index in GimmeMotifs has changed, see upgradegenome_. +The genome index in GimmeMotifs has changed, see :ref:`upgradegenome`. I cannot run gimme index anymore -------------------------------- -The genome index in GimmeMotifs has changed, see upgradegenome_. +The genome index in GimmeMotifs has changed, see :ref:`upgradegenome`. I get 'RuntimeError: Invalid DISPLAY variable' diff --git a/docs/index.rst b/docs/index.rst index 8246c166..9ac837fc 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -23,7 +23,7 @@ If you find it useful please cite our paper: Getting started --------------- -* The easiest way to :ref:`install` GimmeMotifs is using bioconda_ on Linux or Mac. From version 0.13.0 only Python 3 (>= 3.4) is supported. +* The easiest way to :ref:`install` GimmeMotifs is using bioconda_ on Linux or Mac. From version 0.13.0 only Python 3 (>= 3.8) is supported. * Have a look at these :ref:`simple examples` to get a taste of what is possible. * Check out the more detailed :ref:`tutorials`. * Full command-line reference can be found :ref:`here`. @@ -36,7 +36,7 @@ Get help * The preferred way to get support is through the `Github issues`_ page. * Finally, you can reach me by mail_ or via twitter_. -.. _`Github issues`: https://github.com/simonvh/gimmemotifs/issues/ +.. _`Github issues`: https://github.com/vanheeringen-lab/gimmemotifs/issues .. _bioconda: https://bioconda.github.io/ .. _mail: simon.vanheeringen@gmail.com .. _twitter: https://twitter.com/svheeringen diff --git a/docs/installation.rst b/docs/installation.rst index d033ee52..37c51f4a 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -2,22 +2,26 @@ Installation ============ GimmeMotifs runs on Linux. On Windows 10 it will run fine using the `Windows Subsystem for Linux`_. -Mac OSX should work and is included in the build test. -However, as I don't use it myself, unexpected issues might pop up. -Let me know, so I can try to fix it. + +.. NOTE: nope. it hasn't worked in a while. + Mac OSX should work and is included in the build test. + However, as I don't use it myself, unexpected issues might pop up. + Let me know, so I can try to fix it. .. _`Windows Subsystem for Linux`: https://docs.microsoft.com/en-us/windows/wsl/install-win10 .. _`Install GimmeMotifs`: -The easiest way to install --------------------------- +Conda - the easy way +-------------------- The preferred way to install GimmeMotifs is by using `conda `_. Activate the bioconda_ channel if you haven't used bioconda before. You only have to do this once. +.. _bioconda: https://bioconda.github.io/ + :: $ conda config --add channels defaults @@ -34,223 +38,217 @@ Or create a specific environment: :: - $ conda create -n gimme python=3 gimmemotifs + $ conda create -n gimme gimmemotifs # Activate the environment before you use GimmeMotifs $ conda activate gimme -GimmeMotifs only supports Python 3. Don't forget to activate the environment with ``source activate gimme`` whenever you want to use GimmeMotifs. - Installation successful? Good. Have a look at the :ref:`configuration` section. -.. _`upgradegenome`: +.. _upgradegenome: -Important note on upgrading from 0.11.1 -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Upgrading from 0.11.1 +^^^^^^^^^^^^^^^^^^^^^ -The way genomes are installed and used has been changed from 0.11.1 to 0.12.0. -Basically, we have switched to the faidx index used and supported by many other tools. -This means that the old (<=0.11.1) GimmeMotifs index cannot be used by GimmeMotifs 0.12.0 and higher. +The way genomes are installed and used has been changed from 0.11.1 to 0.12.0. +Basically, we have switched to the faidx index used and supported by many other tools. +This means that the old (<=0.11.1) GimmeMotifs index cannot be used by GimmeMotifs 0.12.0 and higher. You can re-install genomes using genomepy_, which is now the preferred tool for genome management for GimmeMotifs. -However, because of this change you can now also directly supply a genome FASTA instead of a genome name. +However, because of this change you can now also directly supply a genome FASTA instead of a genome name. Pre-indexing is not required anymore. -.. _bioconda: https://bioconda.github.io/ -.. _genomepy: https://github.com/simonvh/genomepy +.. _genomepy: https://github.com/vanheeringen-lab/genomepy -Alternative installation ------------------------- +.. NOTE: abbreviated + Alternative installation + ------------------------ -Prerequisites -+++++++++++++ + Prerequisites + +++++++++++++ -These are the prerequisites for a full GimmeMotifs installation. + These are the prerequisites for a full GimmeMotifs installation. -- bedtools http://bedtools.readthedocs.io -- UCSC genePredToBed http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/genePredToBed -- UCSC bigBedToBed http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/bigBedToBed -- Perl + Algorithm::Cluster + - bedtools http://bedtools.readthedocs.io + - UCSC genePredToBed http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/genePredToBed + - UCSC bigBedToBed http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/bigBedToBed + - Perl + Algorithm::Cluster -In addition many of the motif tools (such as MEME) will need to be installed separately. Instructions for doing so are not included here. + In addition many of the motif tools (such as MEME) will need to be installed separately. Instructions for doing so are not included here. -Using pip -+++++++++ +Pip +--- Installation from PyPI with ``pip`` is a relatively straightforward option. Install with pip as follows: -:: +:: - $ sudo pip install gimmemotifs + $ pip install gimmemotifs Or the (unstable) develop branch with the newest bells, whistles and bugs: :: - $ sudo pip install git+https://github.com/vanheeringen-lab/gimmemotifs.git@develop - -If you don't have root access, see the option below. + $ pip install git+https://github.com/vanheeringen-lab/gimmemotifs.git@develop -Using pip in a virtualenv -+++++++++++++++++++++++++ +Note that several dependencies and many of the motif tools (such as MEME) need to be installed separately. +Instructions for doing so are not included here. -Ubuntu prerequisites -~~~~~~~~~~~~~~~~~~~~ +.. NOTE: Lets keep it simple, with Conda, PIP or Source + If you don't have root access, see the option below. -To install GimmeMotifs in a virtualenv, several Python packages need to be built from source. + Using pip in a virtualenv + +++++++++++++++++++++++++ -Install the necessary packages to build numpy, scipy, matplotlib and GimmeMotifs: + Ubuntu prerequisites + ~~~~~~~~~~~~~~~~~~~~ -:: + To install GimmeMotifs in a virtualenv, several Python packages need to be built from source. - sudo apt-get install python-pip python-dev build-essential libatlas-base-dev \ - gfortran liblapack-dev libatlas-base-dev cython libpng12-dev libfreetype6-dev \ - libgsl0-dev + Install the necessary packages to build numpy, scipy, matplotlib and GimmeMotifs: -Install via pip -~~~~~~~~~~~~~~~ + :: -Create a virtualenv and activate it according to the -`documentation -`_. + sudo apt-get install python-pip python-dev build-essential libatlas-base-dev \ + gfortran liblapack-dev libatlas-base-dev cython libpng12-dev libfreetype6-dev \ + libgsl0-dev -Install numpy: + Install via pip + ~~~~~~~~~~~~~~~ -:: + Create a virtualenv and activate it according to the + `documentation + `_. - $ pip install numpy + Install numpy: + :: -Now you can install GimmeMotifs using pip. Latest stable release: + $ pip install numpy -:: - $ pip install gimmemotifs + Now you can install GimmeMotifs using pip. Latest stable release: + :: -Installation from source -++++++++++++++++++++++++ + $ pip install gimmemotifs -Did I mention conda? -You know bioconda is amazing, right? +Source - developers install +--------------------------- -So... +Want to fix that darned bug yourself? +Want to try out the latest features? +Well look no further! +You can install the develop branch with the newest bells, whistles and bugs: -These instructions are not up-to-date! Basically, you're on your own! +:: -Make sure to install all required dependencies. + # download the gimmemotifs code + $ git clone https://github.com/vanheeringen-lab/gimmemotifs.git + $ cd gimmemotifs + $ git checkout develop -You can download the lastest stable version of GimmeMotifs at: + # setup the gimme conda environment + $ conda env create -f requirements.yaml + $ conda activate gimme + $ python setup.py build # installs the motif discovery tools + $ pip install -e . # installs gimmemotifs (in editable mode) -| https://github.com/simonvh/gimmemotifs/releases + # test if the install was successful + $ gimme -h -Start by unpacking the source archive +Once installed, you can edit the code in the `gimmemotifs` folder, and the changes are immediately active! +Check out how good your fixes are with unit tests: :: - tar xvzf gimmemotifs-0.11.0.tar.gz - cd gimmemotifs-0.11.0 - -You can build GimmeMotifs with the following command: + $ pytest -vvv --disable-pytest-warnings -:: - - python setup.py build +.. NOTE: I've replaced this with the editable install + Did I mention conda? -Run the tests to check if the basics work correctly: + You know bioconda is amazing, right? -:: + So... - python run_tests.py + These instructions are not up-to-date! Basically, you're on your own! -If you encounter no errors, go ahead with installing GimmeMotifs (root -privileges required): + Make sure to install all required dependencies. -:: + You can download the lastest stable version of GimmeMotifs at: - sudo python setup.py install + | https://github.com/simonvh/gimmemotifs/releases -On first run GimmeMotifs will try to locate the tools you have -installed. If you have recently installed them, running an ``updatedb`` -will be necessary. Using this option GimmeMotifs will create a -configuration file, the default is: + Start by unpacking the source archive -:: + :: - ~/.config/gimmemotifs/gimmemotifs.cfg + tar xvzf gimmemotifs-0.11.0.tar.gz + cd gimmemotifs-0.11.0 -This is a personal configuration file. + You can build GimmeMotifs with the following command: -It is also possible to run the ``setup.py install`` command with the -``--prefix``, ``--home``, or ``--install-data`` options, to install in -GimmeMotifs in a different location (for instance, in your own home -directory). This should be fine, however, these alternative methods of -installing GimmeMotifs have not been extensively tested. + :: -.. _configuration: + python setup.py build -Configuration -------------- + Run the tests to check if the basics work correctly: -Genomes -+++++++ + :: -You will need genome FASTA files for a lot of the tools that are included -with GimmeMotifs. + python run_tests.py -Download genomes automatically -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + If you encounter no errors, go ahead with installing GimmeMotifs (root + privileges required): -The most straightforward way to download and index a genome is to use -the ``genomepy`` tool, which is installed with GimmeMotifs. + :: -:: + sudo python setup.py install - $ genomepy install hg19 UCSC --annotation + On first run GimmeMotifs will try to locate the tools you have + installed. If you have recently installed them, running an ``updatedb`` + will be necessary. Using this option GimmeMotifs will create a + configuration file, the default is: -Here, the hg19 genome and accompanying gene annotation will be downloaded -from UCSC to the directory ``~/.local/share/genomes/hg19``. -You can change this default location by creating/editing the file ``~/.config/genomepy/genomepy.yaml`` and change the following line: + :: -:: + ~/.config/gimmemotifs/gimmemotifs.cfg - genome_dir: /data/genomes + This is a personal configuration file. -Please note: in contrast to earlier versions of GimmeMotifs it is no longer necessary to index a genome. + It is also possible to run the ``setup.py install`` command with the + ``--prefix``, ``--home``, or ``--install-data`` options, to install in + GimmeMotifs in a different location (for instance, in your own home + directory). This should be fine, however, these alternative methods of + installing GimmeMotifs have not been extensively tested. -Adding gene files -~~~~~~~~~~~~~~~~~ - -Note: If you used the ``genomepy`` command, annotation will be included automatically. +.. _configuration: -For some applications a gene file is used. This is a file containing gene -annotation in BED12 format. It should be located in the ``gene_dir``, -which is defined in the configuration file (see below). -The file needs to be named ``.bed``, so for instance ``hg19.bed``. +Configuration +============= .. _`other_configuration`: -Other configuration options -+++++++++++++++++++++++++++ +The configuration file +---------------------- -All of GimmeMotifs configuration is stored in -``~/.config/gimmemotifs/gimmemotifs.cfg``. The -configuraton file is created at first run with all defaults set, -but you can always edit it afterwards. It contains two sections ``main`` -and ``params`` that take care of paths, file locations, parameter -settings etc. Additionally, every motif tool has it's own section. Let's -have a look at the options. +All of GimmeMotifs' configuration is stored in ``~/.config/gimmemotifs/gimmemotifs.cfg``. +The configuration file is created at first run with all defaults set, but you can always edit it afterwards. +It contains two sections ``main`` and ``params`` that take care of paths, file locations, parameter settings etc. +Additionally, every motif tool has it's own section. +Let's have a look at the options. :: [main] - template_dir = /usr/share/gimmemotifs/templates - score_dir = /usr/share/gimmemotifs/score_dists - motif_databases = /usr/share/gimmemotifs/motif_databases - gene_dir = /usr/share/gimmemotifs/genes - tools_dir = /usr/share/gimmemotifs/tools + bg = bg + template_dir = templates + score_dir = score_dists + gene_dir = genes + motif_databases = motif_databases + tools = included_tools/ - ``template_dir`` The location of the jinja2 html templates, used to generate the reports. @@ -258,12 +256,12 @@ have a look at the options. - ``score_dir`` To generate p-values, a pre-calculated file with mean and sd of score distributions is needed. These are located here. -- ``motif_databases`` For now contains only the JASPAR motifs. - - ``gene_dir`` Directory with bed-files containing gene locations. This is needed to create promoter background sequences. -- ``tools_dir`` Here all tools included with GimmeMotifs are stored. +- ``motif_databases`` Contains various motif databases. + +- ``tools`` Here all tools included with GimmeMotifs are stored. :: @@ -271,29 +269,62 @@ have a look at the options. fraction = 0.2 use_strand = False abs_max = 1000 - analysis = medium + analysis = xl enrichment = 1.5 - width = 200 - lwidth = 500 - genome = hg19 + size = 200 + lsize = 500 background = gc,random cluster_threshold = 0.95 - available_tools = MDmodule,MEME,Weeder,GADEM,MotifSampler,trawler,Improbizer,BioProspector,Posmo,ChIPMunk,JASPAR,AMD,HMS,Homer - tools = MDmodule,MEME,Weeder,MotifSampler,trawler,Improbizer,BioProspector,Posmo,ChIPMunk,JASPAR,AMD,HMS,Homer - pvalue = 0.001 - max_time = None - ncpus = 2 - motif_db = gimme.vertebrate.v3.1.pwm scan_cutoff = 0.9 + available_tools = AMD,BioProspector,ChIPMunk,DiNAMO,GADEM,HMS,Homer,Improbizer,MDmodule,MEME,MEMEW,MotifSampler,Posmo,ProSampler,Trawler,Weeder,XXmotif,Yamda + tools = BioProspector,Homer,MEME + pvalue = 0.001 + max_time = -1 + ncpus = 12 + motif_db = gimme.vertebrate.v5.0.pfm use_cache = False - markov_model = 1 - + This section specifies all the default GimmeMotifs parameters. Most of these can also be specified at the command-line when running -GimmeMotifs, in which case they will override the parameters specified +GimmeMotifs, in which case they will override the parameters specified. + +Input Data +========== + +Genomes - and how to get them +----------------------------- + +You will need genome FASTA files for a lot of the tools that are included with GimmeMotifs. + +The most straightforward way to download and index a genome is to use the ``genomepy`` tool, which is installed with GimmeMotifs. + +:: + + $ genomepy install hg38 --provider UCSC --annotation + +Here, the hg38 genome and accompanying gene annotation will be downloaded from UCSC to the directory ``~/.local/share/genomes/hg38``. +You can change this default location by editing the file ``~/.config/genomepy/genomepy.yaml`` and change the following line: + +:: + + genomes_dir: /data/genomes + +If this file does not exist, you can generate it with ``genomepy config generate``. +After downloading a genome with genomepy, you can use its name (e.g. ``hg38``) for gimme commands. + +.. I think this is outdated: + Adding gene annotation files + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + Note: If you used the ``genomepy`` command, annotation will be included automatically. + + For some applications a gene file is used. This is a file containing gene + annotation in BED12 format. It should be located in the ``gene_dir``, + which is defined in the configuration file (see below). + The file needs to be named ``.bed``, so for instance ``hg19.bed``. -Configuration of MotifSampler -+++++++++++++++++++++++++++++ +MotifSampler +------------ If you want to use MotifSampler there is one more step that you'll have to take *after* installation of GimmeMotifs. For every organism, you will diff --git a/docs/reference.rst b/docs/reference.rst index 5024e2fa..ff54b897 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -1,4 +1,3 @@ - .. _`command-line`: Command-line reference @@ -421,10 +420,11 @@ The second option looks like this: chr2:186923973-186924173 0.430 -0.258 -1.164 -0.723 chrX:113834470-113834670 0.560 -0.036 -0.686 -0.692 -This is a tab-separated table, with a header describing the experiments. In case of sequencing data, such -as ChIP-seq, ATAC-seq or DNaseI seq, we recommend to use **log-transformed** read counts which are -**mean-centered per row**. For optimal results, it is recommended to normalize between experiments (columns) after - the log-transformatiion step, for instance by quantile normalization or scaling. +This is a tab-separated table, with a header describing the experiments. +In case of sequencing data, such as ChIP-seq, ATAC-seq or DNaseI seq, +we recommend to use **log-transformed** read counts which are **mean-centered per row**. +For optimal results, it is recommended to normalize between experiments (columns) after the log-transformatiion step, +for instance by quantile normalization or scaling. By default, ``gimme maelstrom`` will mean-center the input, disable this with ``--nocenter``. The second input format generally gives better results than the first one and would be the recommended format. @@ -859,7 +859,7 @@ Compare for instance an FPR of 1% vs an FPR of 5%. .. _`gimme_motif2factors`: Command: gimme motif2factors -------------------- +---------------------------- With motif2factors you can convert an existing motif database to a motif database for your species of interest. This conversion is done by orthology, which is not the ideal way to do this. When converting the original database to a database of your favourite critter, only the relations between motifs diff --git a/docs/release_checklist.md b/docs/release_checklist.md index 684e0cea..f1e2df6f 100644 --- a/docs/release_checklist.md +++ b/docs/release_checklist.md @@ -18,7 +18,7 @@ $ git flow release start ${new_version} ``` $ cd ${test_dir} # Not the gimmemotifs git directory -$ conda create -n testenv python=3 --file conda_env.txt +$ conda create -n testenv python=3 --file requirements.yaml $ conda activate testenv $ pip install -e git+https://github.com/simonvh/gimmemotifs.git@release/${version}#egg=gimmemotifs $ cd src/gimmemotifs diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index 301984e3..00000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,13 +0,0 @@ -sphinx_bootstrap_theme -configparser -diskcache -feather-format -xdg -numpydoc -pillow -jinja2 -pyyaml >= 3.10 -xxhash -six -statsmodels -tqdm diff --git a/gimmemotifs/background.py b/gimmemotifs/background.py index 52b9c13f..7c567e25 100644 --- a/gimmemotifs/background.py +++ b/gimmemotifs/background.py @@ -86,8 +86,7 @@ def create_background_file( if bg_type in ["promoter"]: # Gene definition - fname = Genome(genome).filename - gene_file = fname.replace(".fa", ".annotation.bed.gz") + gene_file = Genome(genome).annotation_bed_file if not gene_file: gene_file = os.path.join(config.get_gene_dir(), "{}.bed".format(genome)) diff --git a/gimmemotifs/cli.py b/gimmemotifs/cli.py index c7c4a16b..9a08db0d 100644 --- a/gimmemotifs/cli.py +++ b/gimmemotifs/cli.py @@ -739,7 +739,9 @@ def __call__(self, parser, ns, values, option): print( "Genome not found. Have you installed your genome with genomepy?" ) - print("See https://github.com/simonvh/genomepy for details.") + print( + "See https://github.com/vanheeringen-lab/genomepy for details." + ) print("Alternatively, you can specify a FASTA file.") exit(1) diff --git a/gimmemotifs/config.py b/gimmemotifs/config.py index ef450a20..419211fc 100644 --- a/gimmemotifs/config.py +++ b/gimmemotifs/config.py @@ -5,7 +5,6 @@ # distribution. """ Configuration for GimmeMotifs """ import configparser -import sysconfig import glob import sys import xdg @@ -15,6 +14,7 @@ import pkg_resources import inspect from gimmemotifs.shutils import which +from gimmemotifs import __path__ logger = logging.getLogger("gimme.config") @@ -31,67 +31,84 @@ CONFIG_DIR = os.path.join(xdg.XDG_CONFIG_HOME, "gimmemotifs") MOTIF_CLASSES = [ + "AMD", + "BioProspector", + "ChIPMunk", + "DREME", + "DiNAMO", + "GADEM", + "HMS", + "Homer", + "Improbizer", "MDmodule", "MEME", "MEMEW", - "DREME", - "Weeder", - "GADEM", "MotifSampler", - "Trawler", - "Improbizer", - "BioProspector", "Posmo", - "ChIPMunk", - "AMD", - "HMS", - "Homer", - "XXmotif", "ProSampler", - "Yamda", - "DiNAMO", "RPMCMC", + "Trawler", + "Weeder", + "XXmotif", + "Yamda", ] +def get_build_dir(): + """ + Returns the build directory if installed in editable mode + using `python setup.py build && pip install -e .` + + Returns None if installed regularly using `pip install .` + """ + root_dir = os.path.dirname(__path__[0]) + v = sys.version_info + glob_dir = os.path.join(root_dir, "build", f"lib*{v[0]}*{v[1]}*", "gimmemotifs") + results = glob.glob(glob_dir) + + if len(results) == 1: + return results[0] + + class MotifConfig(object): """Configuration object for the gimmemotifs module.""" + # Borg design pattern: all instances of this class will have the same attributes __shared_state = {} - prefix = sysconfig.get_config_var("prefix") - # Default config that is installed with GimmeMotifs default_config = pkg_resources.resource_filename( "gimmemotifs", "../data/cfg/gimmemotifs.default.cfg" ) - # - package_dir = os.path.dirname( - os.path.abspath(inspect.getfile(inspect.currentframe())) - ) + # If gimme is installed in editable mode, + # the motif discovery tools are installed in the build/ dir, + # else they are installed in environment's site-packages/ dir. + build_dir = get_build_dir() + if build_dir: + package_dir = build_dir + else: + package_dir = os.path.dirname( + os.path.abspath(inspect.getfile(inspect.currentframe())) + ) user_config = os.path.join(CONFIG_DIR, "gimmemotifs.cfg") - - config_dir = "share/gimmemotifs/gimmemotifs.cfg" configs = [user_config] config = None - TOOL_SECTION = "tools" - def __init__(self, use_config=""): + def __init__(self, use_config=None): self.__dict__ = self.__shared_state + self.config = configparser.ConfigParser() if use_config: - self.config = configparser.ConfigParser() cfg = self.config.read(use_config) - elif not self.config: - self.config = configparser.ConfigParser() + else: + cfg = self.config.read(self.configs) + if not cfg: + logger.info("No config found.") + self.create_default_config() cfg = self.config.read(self.configs) - if not cfg: - logger.info("No config found.") - self.create_default_config() - cfg = self.config.read(self.configs) - if not cfg: - raise ValueError("Configuration file not found," "could not create it!") + if not cfg: + raise ValueError("Configuration file not found," "could not create it!") self._upgrade_config() diff --git a/gimmemotifs/denovo.py b/gimmemotifs/denovo.py index 30688fe4..39fed669 100644 --- a/gimmemotifs/denovo.py +++ b/gimmemotifs/denovo.py @@ -15,10 +15,7 @@ peaks = "Gm12878.CTCF.top500.w200.fa" outdir = "CTCF.gimme" -params = { - "tools": "Homer,BioProspector", - "genome": "hg38", - } +params = {"tools": "Homer,BioProspector", "genome": "hg38"} motifs = gimme_motifs(peaks, outdir, params=params) """ diff --git a/gimmemotifs/maelstrom.py b/gimmemotifs/maelstrom.py index 02f46b7d..f66f9f92 100644 --- a/gimmemotifs/maelstrom.py +++ b/gimmemotifs/maelstrom.py @@ -13,7 +13,6 @@ """ import glob import os -import re import shutil import sys import logging @@ -28,8 +27,6 @@ from scipy.cluster.hierarchy import linkage, dendrogram from sklearn.cluster import FeatureAgglomeration -# from scipy.spatial.distance import correlation - # Plotting import matplotlib.pyplot as plt from matplotlib.gridspec import GridSpec @@ -327,13 +324,12 @@ def run_maelstrom( df.to_csv(os.path.join(outdir, "input.table.txt"), sep="\t") infile = os.path.join(outdir, "input.table.txt") - # Copy the motif informatuon + # Copy the motif information pfmfile = pfmfile_location(pfmfile) - if pfmfile: - shutil.copy2(pfmfile, outdir) - mapfile = re.sub(".p[fw]m$", ".motif2factors.txt", pfmfile) - if os.path.exists(mapfile): - shutil.copy2(mapfile, outdir) + shutil.copy2(pfmfile, outdir) + mapfile = f"{pfmfile[:-4]}.motif2factors.txt" + if os.path.exists(mapfile): + shutil.copy2(mapfile, outdir) # Create a file with the number of motif matches if count_table is None: @@ -409,7 +405,7 @@ def run_maelstrom( count_table = os.path.join(outdir, "motif.nr.count.txt.gz") counts.to_csv(count_table, sep="\t", compression="gzip") - m2f = pd.read_table(os.path.join(outdir, mapfile), comment="#") + m2f = pd.read_table(mapfile, comment="#") m2f = m2f.join(motif_map, on="Motif") m2f.loc[m2f["Motif"] != m2f["motif_nr"], "Curated"] = "N" m2f["Motif"] = m2f["motif_nr"] diff --git a/gimmemotifs/report.py b/gimmemotifs/report.py index 7aaad596..c16baa55 100644 --- a/gimmemotifs/report.py +++ b/gimmemotifs/report.py @@ -339,7 +339,7 @@ def to_precision_str(self, subset=None, precision=0, include_zero=True): subset = non_reducing_slice(subset) def precision_str(x, precision=precision): - if (include_zero or x > 0) and x <= 10 ** -precision: + if (include_zero or x > 0) and x <= 10**-precision: return f"<{10**-precision}" else: return f"{{0:.{precision}f}}".format(x) diff --git a/gimmemotifs/tools/__init__.py b/gimmemotifs/tools/__init__.py index 50b3823e..510b6ec6 100644 --- a/gimmemotifs/tools/__init__.py +++ b/gimmemotifs/tools/__init__.py @@ -30,31 +30,6 @@ from .rpmcmc import Rpmcmc -MOTIF_CLASSES = [ - "MDmodule", - "Meme", - "MemeW", - "Dreme", - "Weeder", - "Gadem", - "MotifSampler", - "Trawler", - "Improbizer", - "BioProspector", - "Posmo", - "ChIPMunk", - "Jaspar", - "Amd", - "Hms", - "Homer", - "XXmotif", - "ProSampler", - "YAMDA", - "DiNAMO", - "RPMCMC", -] - - def get_tool(name): """ Returns an instance of a specific tool. diff --git a/gimmemotifs/utils.py b/gimmemotifs/utils.py index 88bbc978..103eafeb 100644 --- a/gimmemotifs/utils.py +++ b/gimmemotifs/utils.py @@ -76,7 +76,8 @@ def narrowpeak_to_bed(inputfile, bedfile, size=0): f_out.write("{}\t{}\t{}\t{}\n".format(vals[0], start, end, vals[6])) -def pfmfile_location(infile): +def pfmfile_location(infile=None): + """Return the path to the pfmfile""" config = MotifConfig() if infile is None: @@ -87,19 +88,18 @@ def pfmfile_location(infile): "database specified in the config file." ) - if isinstance(infile, str): + if not os.path.exists(infile): + motif_dir = config.get_motif_dir() + checkfile = os.path.join(motif_dir, infile) + if os.path.exists(checkfile): + infile = checkfile + else: + for ext in [".pfm", ".pwm"]: + if os.path.exists(checkfile + ext): + infile = checkfile + ext + break if not os.path.exists(infile): - motif_dir = config.get_motif_dir() - checkfile = os.path.join(motif_dir, infile) - if os.path.exists(checkfile): - infile = checkfile - else: - for ext in [".pfm", ".pwm"]: - if os.path.exists(checkfile + ext): - infile = checkfile + ext - break - if not os.path.exists(infile): - raise ValueError("Motif file {} not found".format(infile)) + raise FileNotFoundError(f"Motif file {infile} not found") return infile @@ -774,11 +774,10 @@ def check_genome(genome): is_genome : bool """ try: - Genome(genome) + Genome(genome, rebuild=False) return True - except Exception: - pass - return False + except FileNotFoundError: + return False def make_equal_length(a, b, pos, truncate=None, bg=None): diff --git a/readthedocs.yml b/readthedocs.yml index e458c84e..adc0873a 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -1,18 +1,25 @@ +# .readthedocs.yaml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + # Required version: 2 +build: + os: "ubuntu-20.04" + tools: + python: "mambaforge-4.10" + conda: environment: .rtd-environment.yml - # Build documentation in the docs/ directory with Sphinx -sphinx: - configuration: docs/conf.py - -build: - image: latest - python: - version: 3.7 install: - - method: setuptools + - method: pip path: . + +# Build documentation in the docs/ directory with Sphinx +# (builds locally using $ sphinx-build docs build +# requires dependencies from .rtd-environment.yml) +sphinx: + configuration: docs/conf.py diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 6c5b15e9..00000000 --- a/requirements.txt +++ /dev/null @@ -1,20 +0,0 @@ -sphinx_bootstrap_theme -configparser -xdg -numpydoc -numpy >= 1.6.0 -scipy >= 0.9.0 -matplotlib >= 2 -jinja2 -pyarrow >= 0.16.0 -pyyaml >= 3.10 -pybedtools -statsmodels -scikit-learn -seaborn -pysam -xgboost -diskcache -xxhash -pillow -iteround diff --git a/requirements.yaml b/requirements.yaml new file mode 100644 index 00000000..f55d3b43 --- /dev/null +++ b/requirements.yaml @@ -0,0 +1,63 @@ +name: gimme +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - configparser + - conda-forge::diskcache + - conda-forge::feather-format + - bioconda::genomepy >=0.11.1 + - ipywidgets # Necessary for progress bar in Jupyter notebook + - conda-forge::iteround + - jinja2 + - bioconda::logomaker + - loguru + - matplotlib-base >=3.1.2 + - numpy >= 1.6.0 + - pandas >=1.0.3, <=1.1.5 # 1.3.5/1.4.2 are bugged + - pyarrow >=0.16.0 + - bioconda::pybedtools >=0.9.0 + - bioconda::pysam >=0.16 + - python >=3.8 + - python-xxhash + - conda-forge::qnorm >=0.8.1 + - scikit-learn >=0.23.2 + - scipy >=1.4.1 + - seaborn >=0.10.1 + - statsmodels + - tqdm >=4.46.1 + - xdg + - xgboost >=1.0.2 + +# motif discovery tools + - conda-forge::gcc # Necessary to install motif discovery tools + # AMD # installed from source + # BioProspector # installed from source + # ChIPMunk # installed from source + - bioconda::dinamo >=1.0 + # DREME + - bioconda::gadem >=1.3.1 + # HMS # installed from source + - bioconda::homer >=4.11 + # Improbizer # installed from source + # MDmodule # installed from source + - bioconda::meme >=5.4.1 + # MEMEW # installed from source + # MotifSampler # installed from source + # Posmo # installed from source + - bioconda::prosampler >=1.0 + # RPMCMC + - bioconda::trawler >=2.0 + - bioconda::weeder >=2.0 + - bioconda::xxmotif >=1.6 + - bioconda::yamda >=0.1.00e9c9d + +# development packages + - conda-forge::black=22.3.0 # same as in .pre-commit-config.yaml + - conda-forge::flake8=4.0.1 # same as in .pre-commit-config.yaml + - conda-forge::flake8-bugbear=22.4.25 # same as in .pre-commit-config.yaml + - conda-forge::isort=5.10.1 # same as in .pre-commit-config.yaml + - conda-forge::pre-commit + - conda-forge::pytest + - conda-forge::pytest-cov diff --git a/setup.py b/setup.py index f7714f68..fabbd7b6 100644 --- a/setup.py +++ b/setup.py @@ -19,10 +19,10 @@ module1 = Extension("gimmemotifs.c_metrics", sources=["gimmemotifs/c_metrics.c"]) MOTIF_BINS = { - "MDmodule": ["src/MDmodule/MDmodule"], + "AMD": ["src/AMD/AMD.bin"], "BioProspector": ["src/BioProspector/BioProspector"], + "MDmodule": ["src/MDmodule/MDmodule"], "Posmo": ["src/posmo/posmo", "src/posmo/clusterwd"], - "AMD": ["src/AMD/AMD.bin"], } @@ -94,21 +94,22 @@ def run(self): cmdclass["build_py"] = build_tools +version = versioneer.get_version() setup( name="gimmemotifs", - version=versioneer.get_version(), + version=version, long_description=long_description, long_description_content_type="text/markdown", description=DESCRIPTION, author="Simon van Heeringen", author_email="simon.vanheeringen@gmail.com", url="https://github.com/simonvh/gimmemotifs/", - download_url="https://github.com/simonvh/gimmemotifs/tarball/" - + versioneer.get_version(), + download_url="https://github.com/simonvh/gimmemotifs/tarball/" + version, license="MIT", packages=find_packages(), scripts=["scripts/gimme", "scripts/combine_peaks", "scripts/coverage_table"], + package_data={"gimmemotifs.data": ["data/"]}, include_package_data=True, ext_modules=[module1], cmdclass=cmdclass, @@ -118,36 +119,39 @@ def run(self): "License :: OSI Approved :: MIT License", "Operating System :: MacOS :: MacOS X", "Operating System :: POSIX :: Linux", - "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3.5", "Topic :: Scientific/Engineering :: Bio-Informatics", ], install_requires=[ - "biofluff", - "setuptools >= 0.7", - "numpy", - "scipy >= 0.9.0", - "matplotlib >= 2", + "setuptools >=0.7", + # copied from the requirements.yaml + # "gcc", + "configparser", + "diskcache", + "feather-format", + "genomepy >=0.11.1", + "ipywidgets", "iteround", "jinja2", - "pandas >= 1.1", - "pyarrow >= 0.16.0", - "pyyaml >= 3.10", - "pybedtools", - "statsmodels", - "scikit-learn", - "seaborn", - "pysam", - "xgboost >= 0.71", - "xdg", - "diskcache", - "xxhash", - "configparser", - "genomepy >= 0.8.3", - "tqdm", - "pillow", "logomaker", - "qnorm", "loguru", + "matplotlib >=3.1.2", + # "matplotlib-base >=3.1.2", + "numpy >=1.6.0", + "pandas >=1.0.3, <=1.1.5", # 1.3.5/1.4.2 are bugged + "pyarrow >=0.16.0", + "pybedtools >=0.9.0", + "pysam >=0.16", + # "python >=3.8", + "xxhash", + # "python-xxhash", + "qnorm >=0.8.1", + "scikit-learn >=0.23.2", + "scipy >=1.4.1", + "seaborn >=0.10.1", + "statsmodels", + "tqdm >=4.46.1", + "xdg", + "xgboost >=1.0.2", ], ) diff --git a/test/data/background/genome.fa.sizes b/test/data/background/genome.fa.sizes deleted file mode 100644 index f4350ce8..00000000 --- a/test/data/background/genome.fa.sizes +++ /dev/null @@ -1 +0,0 @@ -chr1 100000 diff --git a/test/data/background/genome.size b/test/data/background/genome.size deleted file mode 100644 index f4350ce8..00000000 --- a/test/data/background/genome.size +++ /dev/null @@ -1 +0,0 @@ -chr1 100000 diff --git a/test/data/genomes/README.txt b/test/data/genomes/README.txt deleted file mode 100644 index 76429e06..00000000 --- a/test/data/genomes/README.txt +++ /dev/null @@ -1 +0,0 @@ -cache downloaded genomes diff --git a/test/data/genomes/hg38sample/hg38sample.fa b/test/data/genomes/hg38sample.fa similarity index 100% rename from test/data/genomes/hg38sample/hg38sample.fa rename to test/data/genomes/hg38sample.fa diff --git a/test/data/genomes/hg38sample/hg38sample.fa.sizes b/test/data/genomes/hg38sample/hg38sample.fa.sizes deleted file mode 100644 index 12d3476b..00000000 --- a/test/data/genomes/hg38sample/hg38sample.fa.sizes +++ /dev/null @@ -1,3 +0,0 @@ -chr1 40000 -chr8 10000 -chr2 90000 diff --git a/test/data/maelstrom/input_table.txt b/test/data/maelstrom/input_table.txt new file mode 100644 index 00000000..fc951a1b --- /dev/null +++ b/test/data/maelstrom/input_table.txt @@ -0,0 +1,5 @@ +enhancer cluster +chr1:1-2 1 +chr1:3-5 3 +chr1:100-200 4 +chr1:500-600 3 \ No newline at end of file diff --git a/test/test_cli.py b/test/test_cli.py index 1a24892b..99b4b5e5 100755 --- a/test/test_cli.py +++ b/test/test_cli.py @@ -9,31 +9,38 @@ travis = "TRAVIS" in os.environ and os.environ["TRAVIS"] == "true" -@pytest.mark.skipif(travis, reason="Skip CPU-intensive tests") +# @pytest.mark.skipif(travis, reason="Skip CPU-intensive tests") @pytest.mark.parametrize( - "denovo_known", + "motif_argument", [ - ["-p", "test/data/cli/motifs.pfm", "-t", "MDmodule"], - ["--denovo", "-t", "MDmodule"], - ["--known", "-p", "test/data/cli/motifs.pfm"], + pytest.param(["--known"], id="known"), + pytest.param(["--denovo"], id="denovo"), + pytest.param([], id="default"), ], ) -def test_gimme_motifs(denovo_known): +def test_gimme_motifs(motif_argument): with TemporaryDirectory() as d: print(d) cli( - ["motifs", "test/data/cli/Gm12878.CTCF.top200.fa", d, "-g", "hg19"] - + denovo_known + ["motifs", "test/data/denovo/input.fa", d] + # test/data/cli/Gm12878.CTCF.top200.fa + ["-p", "test/data/cli/motifs.pfm"] + + ["-g", "test/data/background/genome.fa"] + + ["-a", "small", "-t", "MEME", "--nogc", "-N", "1"] + + motif_argument ) assert 1 == 1 -@pytest.mark.skipif(travis, reason="Skip CPU-intensive tests") +@pytest.mark.skipif(travis, reason="Can cause a seg fault on Travis") def test_gimme_maelstrom(): with TemporaryDirectory() as d: print(d) - cli(["maelstrom", "test/data/maelstrom/input.table.txt", "mm10", d]) + cli( + ["maelstrom", "test/data/maelstrom/input_table.txt"] + # test/data/maelstrom/input.table.txt + ["test/data/background/genome.fa", d] + + ["--nogc", "-m", "RF"] # "--no-filter", + ) assert 1 == 1 @@ -42,10 +49,10 @@ def test_gimme_maelstrom(): "arguments", [ ["-c", "0.8"], - ["-t", "-g", "test/data/genomes/hg38sample/hg38sample.fa"], + ["-t", "-g", "test/data/genomes/hg38sample.fa"], ["-T"], - ["-b", "-g" "test/data/genomes/hg38sample/hg38sample.fa"], - ["-z", "--gc", "-g", "test/data/genomes/hg38sample/hg38sample.fa"], + ["-b", "-g" "test/data/genomes/hg38sample.fa"], + ["-z", "--gc", "-g", "test/data/genomes/hg38sample.fa"], ], ) def test_gimme_scan(arguments): diff --git a/test/test_comparison.py b/test/test_comparison.py index 4be158d2..9fb82acd 100644 --- a/test/test_comparison.py +++ b/test/test_comparison.py @@ -58,7 +58,7 @@ def test_motif_comparer(): assert "GM.5.0.TBP.0001" == match[0] scores = match[1] - np.testing.assert_almost_equal(-0.1314, scores[0], 4) + np.testing.assert_almost_equal(scores[0], -0.1045, 4) assert 0 == scores[1] assert 1 == scores[2] - np.testing.assert_almost_equal(3.1666e-8, scores[3]) + np.testing.assert_almost_equal(scores[3], 3.1666e-8) diff --git a/test/test_motif.py b/test/test_motif.py index 964b4a43..fb7cd857 100644 --- a/test/test_motif.py +++ b/test/test_motif.py @@ -256,14 +256,14 @@ def test_read_motifs_xxmotifs(): assert "RGGCAWGYC" == motifs[-1].to_consensus().upper() -def test6_pcc(): - pfm1 = [[5, 0, 0, 0], [0, 5, 0, 0], [0, 5, 0, 0], [0, 0, 0, 5]] - pfm2 = [[5, 0, 0, 0], [0, 5, 0, 0], [0, 5, 0, 0], [0, 0, 0, 5]] +def test_pcc(): + pfm1 = [[5, 0, 0, 0], [0, 5, 0, 0], [0, 0, 5, 0], [0, 0, 0, 5]] + pfm2 = [[5, 0, 0, 0], [0, 5, 0, 0], [0, 0, 5, 0], [0, 0, 0, 5]] m1 = Motif(pfm1) m2 = Motif(pfm2) - assert 4 == m1.max_pcc(m2)[0] + assert 4 == round(m1.max_pcc(m2)[0], 0) def test_add_operator(): diff --git a/test/test_tools.py b/test/test_tools.py index 9903ce54..13ce1d76 100644 --- a/test/test_tools.py +++ b/test/test_tools.py @@ -24,29 +24,28 @@ def ap1_included(motifs): @pytest.mark.parametrize("tool_name", __tools__) def test_tool(tool_name): """Test motif prediction tools.""" - params = {"background": bg_fa, "organism": "hg38", "width": 7} - print(__tools__) - if tool_name in [ - "yamda", # not installable vio bioconda - "rpmcmc", # not installable via bioconda - "gadem", # sometimes crashes on invalid pointer - "jaspar", - "xxmotif", # takes too long - "trawler", # unpredictable, sometimes doesn't find the motif - "weeder", # doesn't work at the moment - "posmo", # motif doesn't predictably look like AP1 - "dreme", # current dreme in bioconda is broken - ]: - return + blacklist = { + "dreme": "not installable via bioconda/source", + "gadem": "sometimes crashes on invalid pointer", + "jaspar": "# TODO: not configured", # TODO + "posmo": "motif doesn't predictably look like AP1", + "rpmcmc": "not installable via bioconda/source", + "trawler": "unpredictable, sometimes doesn't find the motif", + "xxmotif": "takes too long", + "yamda": "# TODO: not configured", # TODO + } + if tool_name in blacklist: + pytest.skip(blacklist[tool_name]) if platform.system() == "Darwin": - # No support for osx - if tool_name in ["amd", "hms", "improbizer", "motifsampler", "dinamo"]: - return + if tool_name in ["amd", "dinamo", "hms", "improbizer", "motifsampler"]: + pytest.skip("No supported for osx") + print("Tool class:", __tools__[tool_name]) t = get_tool(tool_name) print("Testing {}...".format(t)) + params = {"background": bg_fa, "organism": "hg38", "width": 7} (motifs, stderr, stdout) = t.run(fa, params) print(motifs) print(stderr) diff --git a/test/test_utils.py b/test/test_utils.py index 8c1c72d3..b37be25a 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,7 +1,9 @@ import unittest -import tempfile import os import glob + +import pytest + from gimmemotifs.utils import * from gimmemotifs.fasta import Fasta from genomepy import Genome @@ -112,12 +114,34 @@ def test_median_bed_len(self): def test_check_genome(self): fname = "test/data/genome_index/test.bed" - self.assertFalse(check_genome(fname)) + with pytest.raises(Exception): + check_genome(fname) fname = "test/data/genome_index/genome/genome.fa" self.assertTrue(check_genome(fname)) # narrowpeak_to_bed + def test_pfmfile_location(self): + # default pfm + pfmfile = pfmfile_location() + assert os.path.exists(pfmfile) + + # pfmfile in the default db + pfmfile = pfmfile_location("HOMER.pfm") + assert os.path.exists(pfmfile) + + # pfmfile prefix + pfmfile = pfmfile_location("HOMER") + assert pfmfile.endswith("data/motif_databases/HOMER.pfm") + + # pfmfile with path + pfmfile = pfmfile_location("test/data/pwms/motifs.pwm") + assert os.path.exists(pfmfile) + + # incorrect file + with pytest.raises(FileNotFoundError): + pfmfile_location("not_a_file") + # get_jaspar_motif_info # divide_file