From 5080c4d250c237fb36d66ba38baaaf4ce4071f88 Mon Sep 17 00:00:00 2001 From: cgpu Date: Sat, 7 Nov 2020 20:38:31 +0000 Subject: [PATCH 01/36] Adds nf-core template for nextflow pips --- CHANGELOG.md | 16 + CODE_OF_CONDUCT.md | 46 +++ Dockerfile | 17 + LICENSE_nextflow | 21 + README_nextflow.md | 69 ++++ assets/email_template.html | 54 +++ assets/email_template.txt | 40 ++ assets/multiqc_config.yaml | 11 + assets/nf-core-proteogenomics_logo.png | Bin 0 -> 10806 bytes assets/sendmail_template.txt | 53 +++ conf/base.config | 51 +++ conf/igenomes.config | 421 +++++++++++++++++++ conf/test.config | 26 ++ conf/test_full.config | 22 + docs/README.md | 10 + docs/images/nf-core-proteogenomics_logo.png | Bin 0 -> 18240 bytes docs/output.md | 63 +++ docs/usage.md | 128 ++++++ environment.yml | 15 + main.nf | 435 ++++++++++++++++++++ nextflow.config | 153 +++++++ nextflow_schema.json | 259 ++++++++++++ 22 files changed, 1910 insertions(+) create mode 100644 CHANGELOG.md create mode 100644 CODE_OF_CONDUCT.md create mode 100644 Dockerfile create mode 100644 LICENSE_nextflow create mode 100644 README_nextflow.md create mode 100644 assets/email_template.html create mode 100644 assets/email_template.txt create mode 100644 assets/multiqc_config.yaml create mode 100644 assets/nf-core-proteogenomics_logo.png create mode 100644 assets/sendmail_template.txt create mode 100644 conf/base.config create mode 100644 conf/igenomes.config create mode 100644 conf/test.config create mode 100644 conf/test_full.config create mode 100644 docs/README.md create mode 100644 docs/images/nf-core-proteogenomics_logo.png create mode 100644 docs/output.md create mode 100644 docs/usage.md create mode 100644 environment.yml create mode 100644 main.nf create mode 100644 nextflow.config create mode 100644 nextflow_schema.json diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..0229b88 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,16 @@ +# nf-core/proteogenomics: Changelog + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## v1.0dev - [date] + +Initial release of nf-core/proteogenomics, created with the [nf-core](https://nf-co.re/) template. + +### `Added` + +### `Fixed` + +### `Dependencies` + +### `Deprecated` diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..405fb1b --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,46 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on [Slack](https://nf-co.re/join/slack). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [https://www.contributor-covenant.org/version/1/4/code-of-conduct/][version] + +[homepage]: https://contributor-covenant.org +[version]: https://www.contributor-covenant.org/version/1/4/code-of-conduct/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..883a854 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,17 @@ +FROM nfcore/base:1.11 +LABEL authors="enomicsSheynkman Group and Smith Group" \ + description="Docker image containing all software requirements for the nf-core/proteogenomics pipeline" + +# Install the conda environment +COPY environment.yml / +RUN conda env create --quiet -f /environment.yml && conda clean -a + +# Add conda installation dir to PATH (instead of doing 'conda activate') +ENV PATH /opt/conda/envs/nf-core-proteogenomics-1.0dev/bin:$PATH + +# Dump the details of the installed packages to a file for posterity +RUN conda env export --name nf-core-proteogenomics-1.0dev > nf-core-proteogenomics-1.0dev.yml + +# Instruct R processes to use these empty files instead of clashing with a local version +RUN touch .Rprofile +RUN touch .Renviron diff --git a/LICENSE_nextflow b/LICENSE_nextflow new file mode 100644 index 0000000..25c119f --- /dev/null +++ b/LICENSE_nextflow @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) enomicsSheynkman Group and Smith Group + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README_nextflow.md b/README_nextflow.md new file mode 100644 index 0000000..1f88418 --- /dev/null +++ b/README_nextflow.md @@ -0,0 +1,69 @@ +# ![nf-core/proteogenomics](docs/images/nf-core-proteogenomics_logo.png) + +**A workflow for delineating the Human Proteome at Isoform Resolution Through Long-read Proteog**. + +[![GitHub Actions CI Status](https://github.com/nf-core/proteogenomics/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/proteogenomics/actions) +[![GitHub Actions Linting Status](https://github.com/nf-core/proteogenomics/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/proteogenomics/actions) +[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A519.10.0-brightgreen.svg)](https://www.nextflow.io/) + +[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](https://bioconda.github.io/) +[![Docker](https://img.shields.io/docker/automated/nfcore/proteogenomics.svg)](https://hub.docker.com/r/nfcore/proteogenomics) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteogenomics-4A154B?logo=slack)](https://nfcore.slack.com/channels/proteogenomics) + +## Introduction + +The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker containers making installation trivial and results highly reproducible. + +## Quick Start + +1. Install [`nextflow`](https://nf-co.re/usage/installation) + +2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) or [`Podman`](https://podman.io/) for full pipeline reproducibility _(please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_ + +3. Download the pipeline and test it on a minimal dataset with a single command: + + ```bash + nextflow run nf-core/proteogenomics -profile test, + ``` + + > Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. + +4. Start running your own analysis! + + + + ```bash + nextflow run nf-core/proteogenomics -profile --input '*_R{1,2}.fastq.gz' --genome GRCh37 + ``` + +See [usage docs](https://nf-co.re/proteogenomics/usage) for all of the available options when running the pipeline. + +## Documentation + +The nf-core/proteogenomics pipeline comes with documentation about the pipeline: [usage](https://nf-co.re/proteogenomics/usage) and [output](https://nf-co.re/proteogenomics/output). + + + +## Credits + +nf-core/proteogenomics was originally written by enomicsSheynkman Group and Smith Group. + +## Contributions and Support + +If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). + +For further information or help, don't hesitate to get in touch on the [Slack `#proteogenomics` channel](https://nfcore.slack.com/channels/proteogenomics) (you can join with [this invite](https://nf-co.re/join/slack)). + +## Citation + + + + +You can cite the `nf-core` publication as follows: + +> **The nf-core framework for community-curated bioinformatics pipelines.** +> +> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen. +> +> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x). +> ReadCube: [Full Access Link](https://rdcu.be/b1GjZ) diff --git a/assets/email_template.html b/assets/email_template.html new file mode 100644 index 0000000..8e885e7 --- /dev/null +++ b/assets/email_template.html @@ -0,0 +1,54 @@ + + + + + + + + + nf-core/proteogenomics Pipeline Report + + +
+ + + +

nf-core/proteogenomics v${version}

+

Run Name: $runName

+ +<% if (!success){ + out << """ +
+

nf-core/proteogenomics execution completed unsuccessfully!

+

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

+

The full error message was:

+
${errorReport}
+
+ """ +} else { + out << """ +
+ nf-core/proteogenomics execution completed successfully! +
+ """ +} +%> + +

The workflow was completed at $dateComplete (duration: $duration)

+

The command used to launch the workflow was as follows:

+
$commandLine
+ +

Pipeline Configuration:

+ + + <% out << summary.collect{ k,v -> "" }.join("\n") %> + +
$k
$v
+ +

nf-core/proteogenomics

+

https://github.com/nf-core/proteogenomics

+ +
+ + + diff --git a/assets/email_template.txt b/assets/email_template.txt new file mode 100644 index 0000000..c38b428 --- /dev/null +++ b/assets/email_template.txt @@ -0,0 +1,40 @@ +---------------------------------------------------- + ,--./,-. + ___ __ __ __ ___ /,-._.--~\\ + |\\ | |__ __ / ` / \\ |__) |__ } { + | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, + `._,._,' + nf-core/proteogenomics v${version} +---------------------------------------------------- + +Run Name: $runName + +<% if (success){ + out << "## nf-core/proteogenomics execution completed successfully! ##" +} else { + out << """#################################################### +## nf-core/proteogenomics execution completed unsuccessfully! ## +#################################################### +The exit status of the task that caused the workflow execution to fail was: $exitStatus. +The full error message was: + +${errorReport} +""" +} %> + + +The workflow was completed at $dateComplete (duration: $duration) + +The command used to launch the workflow was as follows: + + $commandLine + + + +Pipeline Configuration: +----------------------- +<% out << summary.collect{ k,v -> " - $k: $v" }.join("\n") %> + +-- +nf-core/proteogenomics +https://github.com/nf-core/proteogenomics diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml new file mode 100644 index 0000000..02f0a9e --- /dev/null +++ b/assets/multiqc_config.yaml @@ -0,0 +1,11 @@ +report_comment: > + This report has been generated by the nf-core/proteogenomics + analysis pipeline. For information about how to interpret these results, please see the + documentation. +report_section_order: + software_versions: + order: -1000 + nf-core-proteogenomics-summary: + order: -1001 + +export_plots: true diff --git a/assets/nf-core-proteogenomics_logo.png b/assets/nf-core-proteogenomics_logo.png new file mode 100644 index 0000000000000000000000000000000000000000..138594026753e7e9779e06bb7b1abe8b443f34a4 GIT binary patch literal 10806 zcmb`NRZtvEu(ok`cUT|(C+RtG zx@AYB!({_e;1aK}@~w{b!~U{nD39qS>l|PiuxIpVi9bgu*UV%01(`*Ks}h+Uf0|Nf z6Dv>atl$xDc4GUD4<6!)yAzaR+oRv8>SJLoaSigi_po^_Jh0QR2~AB61|RGsGNiW-ZT(ynjJobB$xfw7)>&Hms{IRH zB;THK^J?p`MMTZ^)9xozk=+}Wy;+`4q2%gfk(eqn=}q|h?cdzW9Q{W>)JfUI*okec z@aY2A0$K7O5|!Ie$V(q^;rZ2WZMES_KWUbJ)UNrRXZOTBZM?i@yoUfkIvb*@W`vvu{XAg3;t7G)85C$04uU5Tvgy1(h?*X&*fdzK3gZN%{0V%7r<)UuPS^04n*_8)Q$;{1R`eN2B`U zJBhsek&X+l#!fS?7Ng9y0n273-^?EQ z0-@frHFXL8C}`kdzRjuVej(B6PeZ}Pc8=fmG>kQkf2?)n8wp+4iZ_+rGj;QOI--tQ zS^o-@-DXyZ@|fK_(vD=d^3>UG1YdW{Xj8l99 zh=3z~sXWaa_REe!Q^+na$2<31pgjYBr)Z6Kw$)Ms?le?Zc6!B$Vkm<7x`(^w7pdtJ z6bcp{BN^ML0l_sj*NKuOYLCiSmi*OE{50q<8TD7a#5vIS^K9*>e8<>+VTrw6{H8Oz zu2-s&tJWK)tZ}YhBv7qr+Aa;HLh{%~o4+^z z8z}Y@ve4lQ2jyd(|MOt(_Gs@A&!L);uXdO=M%f^ODhmf~LUFLXNUQ5=qqBSMR7WmY z%Mb2IsMZ$OB$#xX&%n3r1?L4516z1Wvp^G0Fx?AfU?*}S7MHL6Vf4>AKkxQ9bV$3E zrTW0PV2Y%Iw=yj!Y#RvAv~|cd&C)ldMG0zY53ny*aP`~SD2`t84)|KKYd?mnK*zbz zv$XuY7Uzo)Cfc_iN4sk8U;G}(ozWw#r@GJXyt?f5d8#oAi+moUx^w{`mK`A&u^}=5!@oc%8;L60m(yo%^%H4++Ta2di9)I* zco(6YyXCwsbq1WLj|fHKQ0)F75;bORa(>KkH;KC)L^LJQ8%vHM%*G4^ofjg0+d4hU|Yi z-=RYmp?Ok`q6?8#$?`C+ui41+2h+*scb|FuG%4NY?K!q{NyIy*l4GLQj$h_*=8Yd- z$*#Xp-WXR()ch`z(KFb_dsY5i-k!4V8pcIYOB;2SY8(fqG~;%ouV?J>Ti{VFY`=Uh1K+)Ea@g49c|N6Uap)LjZaOBTwFEs7Yal^R9)(-XQd^gqYg2;_(uhNe7gM9g+T zJ!^ll*O)A=m2ytBKipiq`qB)QWulZn3+enDc{VU)QlAPVk5nqO<@_a`*!-sw4Q+i?p zvu`E+_gsTQdcSj%Dbz!ml4k62>5sFn!H0r z>q#i{LK}=`cVZc?Su4L$7fbgjW%a^ERA|Yw?4&5#g=!8cK$Kw`+&m|vUq+fgW}WTJ zdhvN9N!CTA{bV3IalhXN@2#2$XeWsewd_~uN+B=w9St{#ujkKiIc2&X5($ylbhxZ4 z+_qxpBTh*$tCW)R_cHx3#kS1K`VJ`JuWG$IKPop3;K^QzG;$!!{!RO<(U~9~E@e{v zmWLMsJ}sNfK}{yCQ1Epcl?dge44tTUZ3Hk?aI zmS$9E=DpU^?4#&~8`BiKX#5$pPq)aBxPc@G5<_ls6N=r`;q16V5JbR*bA;TA6W?L% z7CcDwg%C-2&moFMh$#xrZip(CtVg2F>P2IeZ>QK|Z@KpQ;$znGS-kT`ag%tl=0z4t z>R+vS%s$~{3}NG%?w0)7WlO4Acuj(hL8Tb{N-3HMwTQ`yjL`mR&2#b*-A>4{U#NoZ z%Vt61Y06{%u}SW~@!Le(n;T2_v++L*46%_pubQ^EjqxunCm&3Li)Q&2g} z$vFR*!={<^5LvQPG3z?t&Nh{W+c$|zJC&tYiNdGtWZ(J8G3jFLOL*7c$FArHo>8H*(U`d4aT#U!b+nE>^9`8|OrJi zX1R{z_jl?a23OIk!uYy7*cFH_DiD_N*}tlnyTM-|oE<;{FXk>C+r?>QiGFvK#HNG$ zpQr~PMw1CyfMGxsoH5Z%h0zMEeH*G1#mX@+1ui{wew_K8_5jW#k<*@ly_|zNt`yPh zvL#=id6yBZc0%kuHK*O>lZoz`rDW~1d-ue=ra3={bOuLx&0uuJ{4##bTqEc)!qG!a zQ|9;a3}d%dWaYB*o6_)cjkotC*cOT2r)lkT1gVMdavTE*j{V)*?^lwrlhu&Wiw=r% z7G20}>x`PFV(r0UWkl8J98^@~{V^PUEu#J7^U53ukZL_fc{+XHMjS>zqp7MkXOJp7 zuhTSO;NU|Ruk+q>mUYvlVGD8(3n~`JyWHPkDow$m!0#9t`6mgYrSE6VBpDaKRVCvT z^XQ<-E*Ih4cmRR%j(7*PusWth6V1vx;tl+f)#N`VqR~PRei$xa!WecBu%anhRwB@< z{0sw_st=)!Q{EYM1-$E`T_Ks!JWr$IE?-kmje*n~VM+4#PnD{MuYZW+3F2(|j>Hv& z|I1Eq*MnLwe?#`bPuD`i4*Jb+p1D5D9wpkDIC;Ux8-?)I%h$DEUVDsT9ki9F+xNd| z#~V2a=FlSZDw%PcZZ;Q$F;`jx)jJGrSDw??uSkVP7dG$5R z#v(7YKCP^%%r%~0dcG)7eJ7f8QDEUs!0aEbJRWW2vsNqp=$-BW~Oy(fFK-C{F#754{-5Vwjy0BcXocl)5o;!v$<9O}r|@ zImS$nn>dC1k`I!iYFmh0Mys>b2^a~#)*wKN=>8}OHH&?auGx!P}f^L zsFwp`2lbZpTKryi<2amGrSOy$ul!0yob;>to-Fzl#Q?2KzWwKqb=ZjpIXPzY$b7ql zn!*fdj3^C(L!DS$hjV8-T^S~yK3QS?m!9Nus(Rt)bjje^yRuJ%Yb^)9f=%a}*6lsd z%w@$~`hs*~x@z=}CL}ou7}*FI{jQ@`nn0VnsJq-hnpPC?> zeuY&o(fB^jZZPYrg%A38Gg5SyWDI6exLy3KVcc4R%UxuZ66-OZ>tY+MNLN-iJ$be` z93%79i41&Fpv7bf4&e4F2i^lH^-UX?{03_Z$OIRxrfnJqHN>zZY5qhaBCA5oF<*AjgOPpZprQmWH)sU(S`=*`fY{Jq$I24r3Susp)j^}eYD9V{HCTeH z$0bhXUG85Wntgi6*`=&kTi6Jan%EiZhX_Qi4P$FDwZeJ5ko`AdKYKdN&ct#RwB0i^ zX-c^97gySbWX580?#EQs<*f@J`k=ok-dbFqB?2pI)P>?SF3f2_nZSDa$dX3w4lujYIX{m%3)?iJ-je$8ouH#&(`AvAu=>b5>{4b62;h_sw^MwwP5<&i zpXHKe9KWn>%<{K*$mr#Fn&6`&M)*~c_C2#&W)N_gBXdAK{vE^*NvT!z-|@ZHKAcql z{*QX;a;B8oMR^VdE<|6;g6(xuLtGEM7mUxS*$7=&%m0`^(_PV-+C~WFZ+zTku_EsL zMLbGB3MH}NZrtYv3zQ~E#4ID8@>!Ao$V|E5X`mfgt*Dmu5vVqr&G#hI16sZNrC8e7~;2dPHdk=S~(bclfM91Vg%ii3k|8nmz7p9Cj~lEf60 z*<({`Yr=fXfW2EC2E!?K{UxZ6y-hmU_S2874G>rwD)z7UTL3koGnTLAdc06ZwF6J> z8fo<6?TY>9n(Jo4rSa|HFHbjvjRvE#Vr(Ydn3Pd`ZY)COE;X>sT=x}WELsNAf@mJ3<oy z9?4*M@@{Qf_(1OP8ru{bY}z=t0C$9G>lduxcYUUEmtP&}zXc`Rwxfo<=RVEWCT1j@ zlrYjc^`7)dTpM!ID&bdW%|7$qSiX7G(XKV-t*cYipqfcAZVgbG1u>Tev}lh-!vNjs zn*cpAmFtaqAh!Kk6O=lAkzDY@(+xpLqQ`tAU=yXJveiB|5q$BWV)f^M+3LL*p(6z4 zIx|wxD;^)V&d@soxgfV-YBkZ;6@7}@A*A1@+Sh5oy={2E%F9!izJq2qK zt*&;}S`Jxe-f5kdSXAOROQ-c?)SsEObbQfj3vhwsYi{O`d6chJc~jQQt=~F3XyVJT zooI7`155zWKVihvI$G=V(cu>67irLM^$0`4fp(x7O{K3}0Z>L&sCSOT4h6 z$y>jZo{2;$4{c|)rMCCSzuRn|vzYjl^RMZ?6A2}Wz${huB`QDN2HGW2t#IgqN^0Le zmzKW~bmHIf?D#A>H|XZrkDxJT(x}|Z>k&|-Yj0RBkUti!J`;Hz^l~3(%Y|0(M$gZ1 z`2uG?NS{!eJT{cKLC=qt78^Z3O+-&QtwRco^kMTtaW@4VY8ucb&zi~huPqURlppYB zZ%8@LEs(+1j(hCUl<`j}mkLJKim`aAj$R&pl3m646Eyd^l>30L&xfFcKm0wx6N0xc zR_$gZ4Kn8f`Lh(ruR4n{jZU5h@D|HC?vUsQQvW?R*UrD0`wZ!ZHEF3sDIw9!($J4D z9YOPFV=5qR^QtS3jPQDd!AAaV1X4W_!jS5U7?O{Vj4qRh59VFpcb=%5t&`Thp@CEx zRlstG9&BTU_YMBrEe7l}2M_(u8$!@)3V*f9EtY&wNRysGXrjwY=W@3jB9z#k%6qa( zk)7QJJ9UTv{2yeE(yR@^8{#b}H)czm>`b40U3wIElkk-;x*#FmHKA4YbuWZlJ(48lk5nfL-UkPkGMh?OHvP$0)9~1@vm9+0;sH~G z{xg2Ar__VENhE5IA*?bY2puxzd-->fbh-oq! z%@yC)MO^=Okml|6Ly2oNq?<%3-ovzh3~5RFPVd!ZHJM4p%(U9o%4#3SSK%Uy@vqG7ZH76-CO=RJ#}U(x%_v4!m{L$^!G?GTv|uB9mUk@Rfm*yn4!F)c79 z$n9hbinu6kQR@>9^}RF@Y;0bgLAOitm35OW8|`V6^BJ=EyarzDR(ji1eiO!wl2-j3F1)F=BP`-CZq8%frjl{Pd^p zDgr$sD6k2mf8w=C+~T;Xm4aU&do@!9r_!Tsh@QdTjhjX(K^2gIdH`jsQSSm#>d#mu z!w9h@5yrx@=6H1h^;6?h_)c=~And-cN@^Ig z_P4J^3DMF5;f%IV6>x6rw}zB4B6 zDWiBGYU!o~O*@Vsg`Xhfgj(O{FkW&e2u2x=5V2pL-TEo(#(O2j27{cG77~4gP=(P+ z-I(svV+*n~pco+cR5az5ibx`&8DCO9(vpZGMTF3cXcoUtv3FgP-nsA~H%KCP>Y1T@ z29o%AdzZP%R4Q%^N2P1lJ2r_`dJ+9mXzrZv0;FIqa^c>iBWDzdI+%^rn@HrtJ#~iT zCa{abS`9!GbpdM%e3p;2E^>d(>8B`UbR*Z^jQ7uIi{-@gT&DG_9pU4@a7S5ym!yE; zsARc~O9}w40jz=a*cVmr-d?*DkP-|f(o+1;l98(ie2}3eyI!afC?1j|S7e8ONSwl%E#s1p*j-;6|DK`AZr_{MA z1qNgjX-w68Zt{^{FBAj3J2d3Qn0xWy3SeQx^s`EcoI7?%N0PDZ1tpVIzrqam$jG-e zu@`Z=6Gon5=65H%kf+Ral+rfBJy+f0dg(sj4`3-bihhBsJ zR5l~7EC{7Zv$i(fJlETCWLvf0;DmYTbjoPz=%P*)g^@;%c*JB;LVIL7kVe?Ij+e6~ zz7LG*nwHy_wNr|m-sJ!$gd{Xi{Vvhylp(UjKjS^iPm6XVHGpon1L60?7vTf}I}3?vGw)L_Fmh-vM`2A8ONVH* zMeTi-O3AL&7n71;FApY){$TcvAht7a*>-QGdvUMvbJk7h%l6u88&egxc*8=U0^V?rJl$x{U-A0T@svE6gA zBc2KibzXECQQ}HHf4`h5c+i$oZ8t0AA?ML-_HzF7U!tG-n}Ak>4%k#5UWs0hPWav9 zkr(DQtu8fM$|_W7_Nz5+W7Cs&Hj9|m`-d2UKZ8tx4%TzuTjiV!{h(b&>fifmOLpO5 zqV)S)+?pZaoQf9Dg}`R#FO7tGe}GjeV5`%PrXWs zccK{m!4==6P3_c+_(vzr9NIsNC(7vnIdJwBDfyGAFwPm5Y`Y-Z4WF^%-EE+Ql);;Y z`V-|NQ-9zk`G%*2`4=Tw8k>(K^WUNuZolIEJC*J4L7|Fx1;$699gd~={#zoEa0Wku z!o>WKroJNE$yd|^5g38OJ9M}X-|<2)I<%3MBq}Jk{wBPF-(~8J>nW`ZL?)_+WfAR# zVxb470FHJfZbRqXUb`aoN9v1ivn9-31XrNhZm^yv?9FE+a0K6t^jnx2uOA{;#px21 z!>GtoO|xDmHSi+&6U>L%SqA00As)z0aOYL79V?WnwsUqz#sKpOEY>)DI*u&Zog~<$ zzhLlldp3J#v6J}$oz9H8Dn$Z*+LwBHW6s@5hM0aJN4z!HK+9vw0U04RlEyXMlW`F_ zYxpzvQV2PLPs-+>UWvW%@}Tbaei8=`L>d8;*S8xN$EGAM7s3KZxC&Uhv}f?cR-`?9 zmF-`*Z{O7~FC(z|bG$UhA8hQ%y<`vZ_`t8*1coNE>JLc@Iz(tHd*SSe3<1h-js#_DjVd&cafcN|LLV9vYj$Tp|7eMXZIrR z0hBw+A39pU;O44%TCEE#uj6HL`942T8pN4lu~MFeg<0JDGkv)^;&7dDYxSf>SD(tg zD--^8czOO;0DZX^spVpw0Dv|6GQ}`0CIvYbSG)fkL#6{6D9j&8`+HS9bJng}$%7M* z+F!qr>{0pO{WaPnq+~d|!gJq3qJfDj46?BDgZo7SC>MX&Z8|_5S7$&xHS6%%7=QaL zsGs=D@M1#cWVBkWVi_dmmT?f_!l(3-w~*}6yaf$HGteTm_b%cCM?l=4ZS=0XJOsw? z{+12@Fx}^x2DMCa-dro24cYr-s|t@b#~$9@08hCcG*+=Ymad$gC$bKmOiI4SgmuNE zErAV2#FPEM3#2)G8#Jr6+R3^3H8h_kPi$8H*e}tj&EiYGBUtXvOpnRJr6bOvPt9qY zt*2T-p*V0_(|}QY*aOW#*LjZh)Z1Z;q!dM(2?yFovHV4>B%uZ?&*;_C*o06>`-8tD z@%13NN?UfI>0p^ARts=utlXdRU5H7GUy%%svSR7@??@(B;-}+D^d#0C_5nnI707ZnxbN93$O`WSF(j?-=RCZr*WB~)xGmh{pib<@n zmGIQSnl!PKYQo$Lx!)F(c|Rm0j5T~7u`}cUfoZI|Ebs%Dl-nAL&7Cg+$4y))-%Jh~ zV9^fK9U#hRyVu0i3TP^`P}vRxyHh!|le8)n=`PauY|%}Eg?l_Qmu9=6qB2`?TUu?J zrXNhIWQkHs@-wlQZU9ExmI29*EK|egC7?a5C7<^O?P)R;+|XX6JHC1bH@;ndeLB57 zc@Cy(6!;75@XJjEjYKILppADEckU=iDPV4^{+lDi6PSne*rN4kVdCqd3!z$cv)Ya~ z+UCGMBKPCgFRr?UEXrcf*igP__Fa4M7;{jthH%YJ+@|tCi+f}sE{$S+7XcpB>Mugb z@scdSoCMGLQkW|Q8r5tbU)==~g{ZG`0M1FR`Ltc&Q(cp?r%c8RGzUn?hle7NV9crD z4wLDhlJ5e=_l4)O(oM{Dj6<+%KAZLfA8O@&f_NwND5L*KLGLB}PT@O~9#2qn=2$wC@d2ON z3uj+9MW~}KdZRi6*dEMPHGozt$pIeA3*D;od}Prc*1DpR4ndag<@)l@iEd{RM3s~k zw;D^duV7^$hD~miy`m#!A2%xVJ1lDe`FKNsaGA%OhDYv5jv^WnLX%ag)r)#*;`zHX zb_`#-n@Y=%Ixgt0VrTW1PJ39o6B+f~+*^dl$T0jU3cmP5J_xI-)%^^RyqV<}W6Gg_M%Q49b9obdDkiJ{ZJvk6BVcMPS# z&+-Am-XjRV#UVIhf~kS!V6n7ZN;IF_wa5ekQhEv4BE9%R52)#bph*?Wnuwj_riWET zC*2>2`VflMU>C}P6hENkZ#LFq51n){Hor`T89k$&{SE)0T3Au1ezi&9^1!}V^zI!&#oJ@%!cSRY#ZFmj*!8?2a4M|Mf z-nA!~bdK2{%i{r+UT!O(w*vb>%6^iFQl@}^1Y{|0liWQ-Q+`mVoyXD|RZ1zKX6GSA zHZP6tS#H^Ah=L8SoUnld^(!YsNlX=y z_((5-drtOiw43c0W5RL~AD&(N{&ssg8=oXuT_i;d0q;{AHbOd13w|r@v-J4a(lhMp z@H>gdB6XtvfKO57kDs}RZMQ^~EVWoUE#gCTMuIiyspCCrD}Pmx3T4$8gT)epJl)e^ z&m%TE3=MF8eADPb(T91xKn8?bI~|wArCcRPw$WnDg?V^6D88$EmKiGEY`X2*l} z{Cc-%_%41BDnL&=#c}P+vPQcgVg=CEE%L=IM&-%o*xR-c7{j(?upsg0FXL}qSoyXF zMjN$w=DUV}#m$oS5Bid5vxrzIBjP*M%ElHvncj$F&wtUoZ9=O@#6&%^-RBBYiw^X< zdBBczO@^HdTTOXEx6jE6(5+rx6~X-4X~LuWki-%dcO5sT;@>!`Y=@AytD-`mUP4xK ztD}vKaADD@7w9>@mLlzCvZwQ8BajH*3i_xdqCsaf{+WNpb{6!H7rU}b>lQzh)Sc9w zbrS_+iqNEP089Na#hY4sv!r(_z>0F6G`3tafKaQSv=QO0v+W!OjjnOt`zAjw+g~9$o-2=+C%CWE6N+aT);6 z`!&=Ce8aw5IG!C%I}JZp3w%Cge-u>z=r(+DpeT>N@!_EZs+P#4+^8dJX?H-%g@fWa zrKk$`ErQ{d#E2EW&pyaWXPRbUiFIbB|Z+Y7z zi&5=u_373>0T^!afAAr6bcEkG8hCI!-&3?+Hx5~x7ya>iBOqi$emKr3N!%u(EhF6q z?0@<6!VDBBH5mSG79;^vq82vM!5rPnf*;Z2CF8Be4^5bTUHD2+As&56)!f(hVD!;$cX-o{!MCSl# zz+OW9215R_s^ZOk5`CyQh+|A*sV+mRqY^x(G;h#FAT=!X;w%YOl>ScvNz^AX(wX=H zx8(k(=tyVj*JloP5>?P(1Hr0AB@#|!X9!BM+&a2pFs#FFU;Zhz#=^~$R`iqg|4uoX dF*E*(fEzq-B&Gg?_Du*1r!227S1V%{{y)_Jwn6{^ literal 0 HcmV?d00001 diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt new file mode 100644 index 0000000..7834b69 --- /dev/null +++ b/assets/sendmail_template.txt @@ -0,0 +1,53 @@ +To: $email +Subject: $subject +Mime-Version: 1.0 +Content-Type: multipart/related;boundary="nfcoremimeboundary" + +--nfcoremimeboundary +Content-Type: text/html; charset=utf-8 + +$email_html + +--nfcoremimeboundary +Content-Type: image/png;name="nf-core-proteogenomics_logo.png" +Content-Transfer-Encoding: base64 +Content-ID: +Content-Disposition: inline; filename="nf-core-proteogenomics_logo.png" + +<% out << new File("$baseDir/assets/nf-core-proteogenomics_logo.png"). + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' ) %> + +<% +if (mqcFile){ +def mqcFileObj = new File("$mqcFile") +if (mqcFileObj.length() < mqcMaxSize){ +out << """ +--nfcoremimeboundary +Content-Type: text/html; name=\"multiqc_report\" +Content-Transfer-Encoding: base64 +Content-ID: +Content-Disposition: attachment; filename=\"${mqcFileObj.getName()}\" + +${mqcFileObj. + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' )} +""" +}} +%> + +--nfcoremimeboundary-- diff --git a/conf/base.config b/conf/base.config new file mode 100644 index 0000000..c81f6bd --- /dev/null +++ b/conf/base.config @@ -0,0 +1,51 @@ +/* + * ------------------------------------------------- + * nf-core/proteogenomics Nextflow base config file + * ------------------------------------------------- + * A 'blank slate' config file, appropriate for general + * use on most high performace compute environments. + * Assumes that all software is installed and available + * on the PATH. Runs in `local` mode - all jobs will be + * run on the logged in environment. + */ + +process { + + // TODO nf-core: Check the defaults for all processes + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 7.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + + errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' + + // Process-specific resource requirements + // NOTE - Only one of the labels below are used in the fastqc process in the main script. + // If possible, it would be nice to keep the same label naming convention when + // adding in your processes. + // TODO nf-core: Customise requirements for specific processes. + // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 14.GB * task.attempt, 'memory' ) } + time = { check_max( 6.h * task.attempt, 'time' ) } + } + withLabel:process_medium { + cpus = { check_max( 6 * task.attempt, 'cpus' ) } + memory = { check_max( 42.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + withLabel:process_high { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 84.GB * task.attempt, 'memory' ) } + time = { check_max( 10.h * task.attempt, 'time' ) } + } + withLabel:process_long { + time = { check_max( 20.h * task.attempt, 'time' ) } + } + withName:get_software_versions { + cache = false + } + +} diff --git a/conf/igenomes.config b/conf/igenomes.config new file mode 100644 index 0000000..caeafce --- /dev/null +++ b/conf/igenomes.config @@ -0,0 +1,421 @@ +/* + * ------------------------------------------------- + * Nextflow config file for iGenomes paths + * ------------------------------------------------- + * Defines reference genomes, using iGenome paths + * Can be used by any config that customises the base + * path using $params.igenomes_base / --igenomes_base + */ + +params { + // illumina iGenomes reference file paths + genomes { + 'GRCh37' { + fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "2.7e9" + blacklist = "${baseDir}/assets/blacklists/GRCh37-blacklist.bed" + } + 'GRCh38' { + fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${baseDir}/assets/blacklists/hg38-blacklist.bed" + } + 'GRCm38' { + fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "1.87e9" + blacklist = "${baseDir}/assets/blacklists/GRCm38-blacklist.bed" + } + 'TAIR10' { + fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" + mito_name = "Mt" + } + 'EB2' { + fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" + } + 'UMD3.1' { + fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" + mito_name = "MT" + } + 'WBcel235' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" + mito_name = "MtDNA" + macs_gsize = "9e7" + } + 'CanFam3.1' { + fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" + mito_name = "MT" + } + 'GRCz10' { + fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'BDGP6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" + mito_name = "M" + macs_gsize = "1.2e8" + } + 'EquCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" + mito_name = "MT" + } + 'EB1' { + fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" + } + 'Galgal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'Gm01' { + fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" + } + 'Mmul_1' { + fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" + mito_name = "MT" + } + 'IRGSP-1.0' { + fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" + mito_name = "Mt" + } + 'CHIMP2.1.4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" + mito_name = "MT" + } + 'Rnor_6.0' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'R64-1-1' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" + mito_name = "MT" + macs_gsize = "1.2e7" + } + 'EF2' { + fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "1.21e7" + } + 'Sbi1' { + fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" + } + 'Sscrofa10.2' { + fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" + mito_name = "MT" + } + 'AGPv3' { + fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" + mito_name = "Mt" + } + 'hg38' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${baseDir}/assets/blacklists/hg38-blacklist.bed" + } + 'hg19' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${baseDir}/assets/blacklists/hg19-blacklist.bed" + } + 'mm10' { + fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "1.87e9" + blacklist = "${baseDir}/assets/blacklists/mm10-blacklist.bed" + } + 'bosTau8' { + fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'ce10' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "9e7" + } + 'canFam3' { + fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" + mito_name = "chrM" + } + 'danRer10' { + fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "1.37e9" + } + 'dm6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "1.2e8" + } + 'equCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" + mito_name = "chrM" + } + 'galGal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" + mito_name = "chrM" + } + 'panTro4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" + mito_name = "chrM" + } + 'rn6' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'sacCer3' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" + readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "1.2e7" + } + 'susScr3' { + fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" + mito_name = "chrM" + } + } +} diff --git a/conf/test.config b/conf/test.config new file mode 100644 index 0000000..500a79c --- /dev/null +++ b/conf/test.config @@ -0,0 +1,26 @@ +/* + * ------------------------------------------------- + * Nextflow config file for running tests + * ------------------------------------------------- + * Defines bundled input files and everything required + * to run a fast and simple test. Use as follows: + * nextflow run nf-core/proteogenomics -profile test, + */ + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = 6.GB + max_time = 48.h + + // Input data + // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets + // TODO nf-core: Give any required params for the test so that command line flags are not needed + single_end = false + input_paths = [ + ['Testdata', ['https://github.com/nf-core/test-datasets/raw/exoseq/testdata/Testdata_R1.tiny.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/exoseq/testdata/Testdata_R2.tiny.fastq.gz']], + ['SRR389222', ['https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub2.fastq.gz']] + ] +} diff --git a/conf/test_full.config b/conf/test_full.config new file mode 100644 index 0000000..08642ae --- /dev/null +++ b/conf/test_full.config @@ -0,0 +1,22 @@ +/* + * ------------------------------------------------- + * Nextflow config file for running full-size tests + * ------------------------------------------------- + * Defines bundled input files and everything required + * to run a full size pipeline test. Use as follows: + * nextflow run nf-core/proteogenomics -profile test_full, + */ + +params { + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' + + // Input data for full size test + // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) + // TODO nf-core: Give any required params for the test so that command line flags are not needed + single_end = false + input_paths = [ + ['Testdata', ['https://github.com/nf-core/test-datasets/raw/exoseq/testdata/Testdata_R1.tiny.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/exoseq/testdata/Testdata_R2.tiny.fastq.gz']], + ['SRR389222', ['https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub2.fastq.gz']] + ] +} diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..4077e14 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,10 @@ +# nf-core/proteogenomics: Documentation + +The nf-core/proteogenomics documentation is split into the following pages: + +* [Usage](usage.md) + * An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. +* [Output](output.md) + * An overview of the different results produced by the pipeline and how to interpret them. + +You can find a lot more documentation about installing, configuring and running nf-core pipelines on the website: [https://nf-co.re](https://nf-co.re) diff --git a/docs/images/nf-core-proteogenomics_logo.png b/docs/images/nf-core-proteogenomics_logo.png new file mode 100644 index 0000000000000000000000000000000000000000..39f525f756651f06102019189d9ae1a9961d4118 GIT binary patch literal 18240 zcmd431y@_&(*_CzcXxMp*HYY}w73>`cZcHc4#lCkL$OfY-6`(D0>uMde*gF0?{L@3 zIoT^Knb~Ld%$_sPJSWlWs&Z(^#K=%kP-qJB(qEvUV01plt%&d+=P;U(s*g7$XL(&W zC@A#7{|@MM7IYFQC`u>=Y00nNc^COUd8FSu`wBJx@|-G1j1fx3aSdWd5JF?YP`h@H z%4~K-oinmi!W2h!>_!+3V04}hp>;+=F2`4=SC#@bu_&JIynkoX5extOa zYx*j&{a4rTimnRPJ8_@FnF`C(I`QKRJ=K62zpYzxbzB*Z|DU|l*0VfGelo@s5AJvD zC(r=mpd=u>7#j`=#f*IFA!Up)^ZlgVCeVQ~r{tCxaoM?J%H)&BZk_i0UBf_ZJKdCl zIEW{B&olQIqarMxPE0W)*)U0D1mUSlSuHrB^_tuyh|;Xl1dWh{cMINx%sI^IwK=Hg z@oMc}x0X>6xBf5uhiXO{qvq{E`~1b`S%EDIR)g491OQP3A?k2&BRs1Kn@I(r!h1M( zPHO;9hKcI0Bj7*N?RNY%y*P@G1gyZkenK}Rhsg+507Swpa29RC00o@u6w0*PCLCon zhEr_Yp+8h0cfZ}$TX}apVD4D{#OPQ7CI`^J3pvfFk$^Aaxy46|mP4^TS_gq5o=qgZ zeiJkk>()KRjE9v7N*cIj074(zA$L@50MU-t&JREG*A01dg7+m3!)rXz<}j{v(;YeS z`{#-_OSwzk`e^0fL3!HFj{~N;%Er^B$TR%+qX=T$6o@e7vDCOxN4hkO#*fY13Q4ydXatQ4A z;@GP~tDdJHnp=LD8D4)KU28Fi@ZgrK>H~Fxdm8SYzic695kTREY2nF4t57)G#y@1z z@Lc~!7=T_*;ZJH_ek=QX^x>(5UrmP0kDdBb(@C^1Ohx^OeEfpZj1pP3HS$u4AZ@{1 zDK2X#{wgvyup@w0?hIoX_Z7_CLkrnwYyCJbyZ`xoykzEhbU#Yn6K7R_!2tMmdv60k zIjg=L6N5~26(pl^MunF5ZU5DDXQP&@j0>`{_IaX9 zg`&UNaCpf8PfXcw`9ZEn^c-DfGdn6Ht2W+S!7Hg}t;^r#ZuPHq?YOGrTlK$eF$PJT zbzQs>#}t441gC{IDGZt$@cZ*_;rf#ePiy%*8liG_cax4CXZgQxG8)(}^t1Z=uk~%? zNgd~b7o9b0zhicHj~f?M2YQBQoxWHCOM3!Asa$JwfiKso3!P7=;x|y{O$e9H#|M0N zRu+X4oH%Cl!I4g5P7`?gSTm3CxqysAm8+{rN3je3nx5CSR#XYMbv%dXwwU6y616vC z)Yr!rpM%#vGo^5A#_VB529#m*e%65o=n4Cp{CF;!xK_l_Y#GS}sL_3IvGVHAhrPu}+ASIC4UScOpVxQ` zOuhS6<1VIUHn4Frg#PNhmk_#*^Ky9HACPPZO#LdUx8KE5$1NkhGx=tEWtV7bwtd^1 zaMfpQ#8oEsBb=XI@CTC{AOk0Z+($+M{HId2kLP;zE&NHsTOcrE zqsD1l-F|>}{+FOMVJQ*cr}1vzQ!MSx4R${~i%?faXRG*!K6w91nYK97ljQ|&9Y4BW zu{r$iuQG8+RwNtq{4~~b2UmMthxMfb@dvK2*)43{MnwtMr}T^N@S?Y6o0`=#x^HiV zd^apCA1kJ^pdOxf2W5%pn;+r)l>1q2(PXIJVst%k+k+%^;~!IY%77k{RA{K=}>V=N-#ib&SlsAR;+g9DP- zcE9tOD8$+`nNHV>N2&M~4z1G+P)X{yxrW8J5iH|w$F|U!c}1kabtsC{m?F=)qJWE* zabNX|0-Kiy)?>?+FNkVxLCi5J!3jvN+u-N@liW5MvCi58BCseZmCB-gf8&%8{#rD< zIR8P8vqp}azrMMoq!SX?(g#`EwyhK!ZidQM1fk%+F%p^lWe-~w`$;h-Ee6nyT9Fb( zAqj3B^%;kmih~0uF+tpdeSoYcde=FTu%Dce&UKMk(cT=b{1u4_rR@u%NxO_pXjhjh z+v-YOJbg!f^e`7i$VOr76QMvMc=F;^o)i?p4_~_CZI!;y{jHZsaF}t}%Itp4nk0?b zYssXZD<6Kb&L`$1R8FW5JIAU{oK&f5$&|TgXdJ@glrLBUdvq= zs&zY$BO(}_n~&E?hy`TaT|k;L#ctjbuP;ojKl8;|wI$sogfJVin)yqBw%gB7Sk}ek zyP+pPV~h8~uM%XbTtK9AgAYfgUNC@S{oC<^_)@?|pdCOT-uB7}DEDK~DOf*v10a&C zK)i|*&Gal_LgmV8=aVcYas|^1pp6DA0T7@Wpdu+@+(l@dcz6JtgN|H$~SD2U1SNO<1qK6CYoA$nId5z^fFx?h?MZ5JiQWN7rG z@B6UCTO5xh=3rhV7Q3Ud;plW32a(R8UCLX!wQZM91un0z$P>eMS&b0~;pyd%9^{O= zA6o;hsDe$=$hCJiV%w699u|``hngH#3VV)GyV$&t(#b{Xc-&d(=O^*rD6SZNEeG50 z;U*H!*F|;6o6t8Slc;i)$t=Q)cV(q{A#_D-h0=qHG~ErLV3s8FbUPAtBYBSRAeSgc zGBx$3KM1`rc{pzGyw0ModB3KBu~O(*KackU>&|vxfh%_1LBy`pdegIsYWniieH8kq z1{qy#6YIZb9lqVDhTq3o`)L}5u_@UbJl(>XV^7;>LbS{N%?xDiIRo|Vl}`o0p>=kUNn2s=mwQ=SL$MqLU} z{0Se)xOwq2Pk|3G`dgp&BRVPog-~zsd5W(OL#R8Nl0JQ~)1O2G?$SV1QhqMugve*F z~OMHTPVPE zOY7rDwoD#B^RpG-tfbk*-ZaI?1(uWhHo|$h0Q3G#xrl;bjRI&z@8kEk?zSz^d4@za z&cd$o{JXM@dO(#*_`3@Db<|2_aBF<7#&Ad^L;^Z;F3)bLY*H*K5~81Djv~Lgd>CcL z7M(;2D~=(GZV-$*(wZZMwd<=MHZcWd49i~f)RxoutW|Ddi);B7zT&{SwiYp@pk%Qo!IQI~ zpb|6xImd6HtUGM zc9nkZa}QRuIm^SRiNn;{ zn^rX|ik6JB6Py+o8V9%Ki(%hbN7zSSUR%G@;mO!=s$9HQT zj^Sx03DExz0`m+Re9K5_s#Ve74h+c5Z%cRqqIeh=opQtZgmZ;dOyVM-pm;Wz^ficTx2}E_Dz^ z@7iHRyfVf1V2`As!AuTF4<>t37nc|o7sOj}|x-lZa-2!M>Rwp*G4W*ha&ZmbM&MD!MiJmj7O7BGqhoc1F5Eaz!p_+TQ zESySNzO*uLCr3ujSVnC%SBk@)YKmO1Z;NslwPz(QlwYurqj30{e?U3h!w8#8Lt( z)a5s2T5(D9`y;V1GEjtQb^OJ!zlx)b+2B72PJOauO`iA_Z@DotVYnaI?>`?&6VXb@beuFX;S_kJUKo-$VdkARy&nCiFAU}JcLo<^AvJogdtwsZ4& z&$TN6E7JxuuNVCDA!oP}9G#n?Fo`+#ksO%Ph zRPBP)j+j{dE^DDR`VY0p{7|g2dtTFWp{0rLj4wUBS|)!{yVJoun~??Wfzp-eUsvDV z_YgjU#Yr}$C!-l|%*%s-d(+!L?&cY*|G72mZl#z&XnIKpZF4r*7u!tplsCghlEIN| z*dUC{3!qyQz_(oB$hHtVy^82=y>Q#?$wJ)8E42xg z;X?34tigsQXuZ1?6A7!$0YN4I&xnarTCo*)(seT*h4x4m>_vs7$QZ znSqrGZp4RV|?L|n+LSh&9+1d&&ee-iJ*A` zZ^bvgz6p%!$MUhbt+-wE{bs95b7dRdG9w<8<@F-lBeqo35jq&_Ewrm~!kM%T0a+ou zYqo8*nbJ8)SU0dvCF(Z$i;Q8r`7UZc zsc}E?o$4MOWZX>E^YnLke?_PBIF8HzMn=SSZ(l7L?a!(vP|i~2xY7ndR|LOnTwv9k za(hcr$o21Z9`zy?zmfFB?rAEi$mFQ<%j*mSAqx=-eXD*Q?$!5$xTbP z8EYzf1*?m4RZki6g)96$zbwXBT|JYOfCMJ`3aI!ju&a6%Zu}ITuJ!rr1q5q{ABVH5 zGe`s1Zd{Z{caGTf5?IrlCwFKH7Nr$f zco$lnL&m56!-3w1&`aUAZAy!O^@s4~GIr3ZHbK{uZ@Lb=MQ<+OT>H5ET-x)#d0ote zQK$d7l=6U25k!Zu-DCYUu2OA;VR=+DIXRNidj^R)%rcY!#xT4eKfKbvqijDrA#My1 z+?5OMK54#IuQ*)8Ymyfh@}%~>Fc84}jV)l4Vm^E|S$vqu{;=yW>VS!9f+k%4?#E%T z{KZt&*HaGtHV2!`)|4!Ymn7W1k}$z(V`$redC?0@;BO;-)M97ZI*Av_+1cGt1LC?ZYI(zL?Ehir7!x@g&2s9y!goBi@cr z1ndt1xA4l01*li6F0(H~*Yv%DR`gw#z9?jJb^|iMS%*38YhOp>48`$4{%`vy)vK?| zY6jhW@5&X*MjB}sp&5Uj%R(+q$ukdn3tP7mOeX7X)78lhBroQ+)Tjw&(BdG9j%r|x_&Pwc!zaxh#5nVXM5BqyWd%3Kpd1ltQ+(8d9Ik=Msv z_e#fd_76Ss8cXk%&uUBMky;H;b4%e6e)@~#oXs$SPJ(*1K!R{Bj?!-VXGw{soT9z3Dn8>f*_to}XR=7O#mt zct{HVb`KG{VMwoINpY(9D$gSTzsG%)|4MN#`5$>jvD#)1(^U7X-&*yUMa&ew5_5M% z=`%~AS{AORrIn3g{GBP@F{h)yHC!&a;Wz&g9_kwEV&7jpJi&dZ1nz zBC5{zQMkXd2=~ar1k#iowQzG)%;MoCMy&l4LG2mzb`-Dc9RhCiGypAK`6XjxJ)J3kq%0YVh7_A9ycxJ3V7NOO|*5 z77%b_tjmI*fFjE|Y=XLC@ixyyY^;%{*CK5ajuc*tB&sCYg*6#pW%ZNui)z-xuaglb z#ggZ7b_nYe`H(M3_)rs5TqiQ;@Q%tE6Dq(rWb`AISok7lt}v=nBGQ?#au2F?cTVqu z7Nz@jqra^u3~)cvqEB}s=#cveax>lqBhkI7vV0M~2)jvR= z-{cE3!LvmFw81B*o;QT47o+XyAud>bf-z%P0MXG;$YZNWSI(MzvffQqw#1bWt{A$oea6hMF(i z(_EE)rr1L6C5x~^4K$C8C7N$QPu7@NmV%A48gl)}j!(Jtc|70N-tp~zJVS>8AfJ|V zU6Nr4#zRlfQTuAEcTtu1B|z_(%Yj6!ZA4(WuDzG-P9C3lSU%TgGOXE{yK@Nt1D#64 zhwqd+?e3L@k0mg}9~}6~T2ICGC7Hae-z4?u>erv9k~fUO)cFMECHA%aiaFxs-M#c* z+W$IHCdq1DCA#pb6qR4kd@S|q`S)^!NeLm+SB)=1n`^P?+8%jO8HY>gol z{q;Wwu7MRCgdg$vW7)eQl#pH_iwKJr?(U1#lWBBvi}rBN7rd`lYvl$qYs_L(m*zrX zSgj6v<7T*J4N-33Dc_cH zsNeEaokhUa?SjsRKzVHn)136N{ITpUj<0N4u`;h-YB|-lUbqn@$qxm*dGba5KzE+9 zyCwZVQy6n|mPH$em|j=n1fl37l-t%p0v^8<|Ez2Ey`ao8cP-Rq{LSSsf9+;emjfN! z*e9iv?#Cwj@6U*Phyzc|b2XQQnkRqD<_qlda*G)qO8Jxyv%c$N`3A-%@BWcRt;ZWL zdCurK-;q1%XI4pCzZ(dn>hoCj64*JQU&Bg)BaiNMl5qA86&5w0JL?cl`k7$4w||uE zE8=>&KIUCTv<%pGNM<9G+~T&5Q_wy~%6Q#QRqa&%Z~dlPSW~;y%PWzK%NkFG?cNK! zyZ1Ov!_rKTuTeI?TeC~!OVlA?2u7VXKN{eti6cDd+P^?Y3Y`bFn?2R;@J9yl0GD{# zgS1jy(M#`}q_@kOBTRFrOA8haw%bn?^930ldYVd}I+SRbV&5}KqKr1~!T7G9`!!G5Io6tI`D3BXrkMH&|aFIbDOu~fgq9(fk- zVa0b%26%TA_b@OVVFEAJ1FIk5_U@u5K1-)9jTac9U|^MJ!eHzNUe45jvklZQJ;Q?D zsqIg`#YIs3Xhi6@Kg67A&;-@~7b3OM{mi?wYJ9vcni4D z>c@cJxTq=i z0Ske72rq)dDy+f&V>STO<)3lXEO9;BNtyTM8!`l45b)5^-Z?rixYUvVjDLA4Ob!;U zv2coN;R%N;pb64{61m#w9Ka}8;rGLnaVe=?`$>{l-#n;+|N6JmmF0E&PPZXuGXZsi z?BtpVLy&g0Y1Dr3P+mBUl^Y^}pm4GC{An5{6F?xidSfh)d6y3CiTX4F?{Nj)O(clb zUQ@h4bo&2s0rv9?{@qjqt2zQIB&FjDVh4YVA&M%vc-LX9A_+O6xJ=+_#V=H{1scKy zuJAYjZ);j#@U>zI!)QCWw6uOTWE4$tOtx>T%4np_H7Q?WZyTK{G3y@J_ut8ssg-C6 zvu12&(Uy}%VC2Qab~B76Y{iNu9Sh8sJHhgIitcy;dgJ^HoulSxI@|9pg*kv?p6*wnvE*3YzRH6a(t6_y6G$6=jB0fD0Yo`bJ z-tx0f_LgLaPu@VQ!Wn;QBO9x&c@dS09)uB+Z_IZ5jSePqS*aq&q4DhT!HINacIs;S zJn+>;Vd^Vx8M+>M7+kXb37U~@`InlnCEpvy-wQwKZdNA)#KS}z_AS48SpdKQ&lPHF z(DxjM=M1QNxZbtW8~y$;APd%&ewW!uL}mWuotCf?YTJ*I%Pzzjo=Ck|?M{GB&OAM_ zW=!j?*QP1N(r;x$^|+8D$5*)E>3LK>MTjM)cu~oYyEE5hbvh!tRxaA{@(WVFG| zoyWq1u3vVX867!enXA8(U==|(>>8T*cGd{lpVS-4QA$rgNjJRy^Ij7%U5gfV|GD1+ zF^6umE=pc7;=G?t&Fp$8Vu{xX6zNRQb=ZbKeD~9)6JV4=0YHb_8Ed-;McXt+b2$@^Tx_VOFaAXD3E}?4`qm z9tlaf#O#)b?Qm>ri_G`o{dTp>lO*O*8msv#4kCTrqTS$fxzmTn)p)#xRb|p9`JETO z(X=QteOsD7^)@#BP9|*oVT95DG9N+^oDJ^o6>ZbL>e)qjkMm_O@mVgb6K#cU& zfrfa6uZRsJC--T2*AhWso)S=!skR4_e-yG~Y+i4^k}J&m5`ddVFrcn#z;9t4wPmXs z_7~ZnqqDpaCIK>!nE|=K2mf@PKy5<3Gez@8+0G|w>nZY5*kFCQcaj>5-NDC`t7ty!!34JjIX^l3&uO=TU&UzkLSS9Y~%-6tj=ZE$|0}#Gv=?`d5m)ltrGcEWtuspxo z4>QpeUX7@uH%W7d@@rmRt!LC_gSkt%K0J#q8L|Jkd`PPe6Fa`5ZVCO5%j=a=ReP2+ zeJDYi&qhx#hlM2d_MEz3jtoHhcpu~>%f+w{vS3Qm2OfAUzos;{vEV>O{A;oBtMaZ1 z@;e@hQ5Y>f95Tv;l0tb+ob?*r)-TK%2%Mnb{B?vMdB6x-{CiVOv0xGThijz$U==%a zrP!9hnSm7cKh7~$S{vXcuf$3P3^Q2tLRm7Ph@{-S;S9_GALqQQov-E}S^iXoi$_FW zPl{Y$i02AmRCatxLzn#DC3rMVs8o26~wwqR~r`*>GQdH z7k^rgMY-zxm(lK1qAZRnNU=?BO%p3Xz)2sui-p>5);UH@?!Bkd957jI8KfekLHd8Z zNGSB*AgP*lPEqUmt<%T1u7*j3ZtGtWlrw+uMX%1qo&Lr2k-lAU9g;ea2th!dR-z(8 zt(e=rNIFpFT%xw3w@DIb5=q1=wYPp~N{QVy;oIsOxCH-gmc_hDVOK_jbf3 z!NE_11Uj!OE&o#`3`0i!^?zDzIGG#%Pd#zmf3g2DPJ=Ju`p?5+<_-M`Iv9?6Nph?s zzX1ME(Y#>|I)7Q%9M$Ll_qz*h?-BAf(SXvzriSR!Hvrp<6VC^b;{fFP(#IIXtT zBO72Z&Dsy81wDf|L?>Yjj3|K>>A-(Uu8)8?1|)i1Srk^#RZaow1@D zB(MYB1sw@h4~0*;qWO=|B6+_V*Bh~eL`Wi6&wQWK8S0wZ1mi$-T6)^)pQt19xq<4b zkNw>pddh%(qRB@1p4hrtmG@dOEBg!WHQ_bX_aK(R;>f%!sbFntq4gv6r%jF$@5`$q zk7Myt0-=a5s^cK51L7=Ma8|^_jjy@YrxZ&u#>z%`m#}i`eDU;1w`p;a1U$rXVnZl z0A4hNG!o(DEj&}~pzBf&OvD@ZeIs2a;Z*qG=~nROFNIXyq3k@#VxPXr0@N%Zj{4yi z{=k4;rf%Y*+>H-04`vfyR1Ol(!s1W@bPBsE7VxJ5C!w7AOD{SonH;+TSEBoneOoT2 zLMUKJa283#J`8!KW&!{77! zKY#=%M`(VA7B1hO!WhJ7OKy}$O6t!kMt(jA(6<%`A&aQb0Fn2c*hY?8u8-}!(2^6=IHKHst?y`k+Px>8Pw+>@>QOkADZp8Q zQ4?8w=e?%ckU?&m8u~xQn&L%7R(;@IXD@k1783iyo z!g~fk@bxF$T(~3neVzvrb^Nt&ZL0-VxQOuYyD-jM3p{(*ekKOV`jtFL6F1P3MeFI4 zKXS#}{S!NQ5xChblrx(S;JBH6=N_X_bZHBMaw>A+YF!AKu%7PhBL?&GhEkPEc`)30 z=-4{Uz)(PmBSUKYEhNJ8+G$Dm)Mv_&K#18W{*ecvt?RAVEQ7arnY^y`?hSq`auSlCHfd-=cSLDvN6i!%!DA7;30e#Z# z)s@DC}uks{dgq$F?R@)ch%e@XfmyUwlSRDW;2)BCC)ZPV0G&L*1|GjU|p#EJVhffbd@o@`@cx+4qzreuyjgD(ev zDC2LxdaipG?KfJt0nY$+cJp5GjxMmv{>1yOzuF8>z3qh)UTus^ZfWNUM-Kb+OcuOh zKp+P})pv7$H7y3D8XMksXc9+>;n$+WIlW|3N42C$w=Sqocs&Ovaw2gLQ%o$0+J-Cs z&FNPJJ#+2ai?0ZNQs`8=${Ib#Z@35~R@^S#TImTxA?PE;*)Y`@;H~yOzeI)x%P&Ew zJKp*$C^Z|_S3@sKdn1)C`X*!_9l?JhwisOQF&|4bw=izDFP!J$8+#G!SXXR9%>DG! z@cwOVZi}LIRh&Rgi2<>uBDtugL%HH6l}zOC`?wMv4$9IqQkuzSd(K+S(<*4_O6 zTbDP$(FKm+%l7Q|9#+~nse9>^kt^E6<_@3!jVV;~4i5I?x!$WxNm16Ee|iO}Q+YN- zkr-dLl`myKn4CIY@s*KU2mLt;ubR|uqi8xaD&FMR09pZs3Rh-syybK{d80hO(MDov zUw9Ok!ao7**KB&3W+Um)=J$~(vB|4xkx843ucNG;W=Uqakv71{V71}Z2dRz`hbQ@h ztjMI>69kXTkSL6mox|H;5Rt^y#Aowm2ka*Kg51_o4}oBVzB(5rN}+yoQmewi&2W$z z?#wIakb%(%C64YMCzLhEx~q!(||qdk6Qf+q4Yf>zYdis#U#`+3Ggmv=tk)TsT1hdhaJ@ImGV1yQJjjcm1esK!)J9G%JZ`?BZwvK z5ZX$3If&o}7!*9N0JfwiIF*YIA}^=e>UH|+ef1EBN7w_jiV9*g41@imy`Q-6|Mqon zCgT>%uWwkV2#m9XGOE5MGbkcnL4&5wmR!*P8sfp?ib_#*L}x6~l8 zS#C*Hi|l5#67d>!N~l`%r1a^l+eKT>24a_CV9d?(^|${1OXy8Y@+F?`{at((TmjFc z5_Ai45mnttku(b|CxgvfZ(7HZ=Qfw|+z@>eH*zB8c$yp0O-rf(A=zwp9$5*j5>e;a z>=+!&oi9e4ZrgHA(H#5H+$%4qU_y%s#kBOOl($L8q0yeLnknp?87SDed&{#3GY3c5FS3l_L#X!JWs!sAwf(H=uek7r&J zyGH}qakEE`{ktJ>39KF@|`0$n1ix_qBKzaDrOr7y4bS%>_3 zCIOR0qOmPucc|@Cva*09EuVKC1by51sKsK6)2N}-O}gR*7!*SSl)pbvtA`BtYbYLM zi@$X!&-Fk(@TK?0qDl@?U0ADCd#4Wc5_|wO4AWN-glXZ^u5_g zZ`o?FZ=^59=Lk##9ff^-C{LjWW6w$fISBc=BrktgAIh$C`$T|*k=4A#&0MWKV<<|s zi=TJTa3e%ESteq9e-A6D(he~1lc1DB^;_0CZlJdhYD+Qzgj^@Ro_hFg%S#BoS-#C+ zk)BrhAP=@MUgjgw&~6ggyPQggEGa?w*8g4{+V)!Q%OMyjX?tWoh9PFiNWpyAbdnqlMrj|}rX$ z59-a^RoKMNs=Z^A1v7hE?ja+L-9N z3>9N+rZyzt4Hn`Fq?+@nlSrZ?%{s!{Rf#rMeT#d@GnV(CIiImU=hH!3*}p?7HS`Bu zU{&Fr5=xy74&kUzz09f}VH%&ml_mIQP;cc6LWj74Dl3R4=>sYEc{lgNDFfBYbF5ty z8nFbjTCsy9$q`*Hw>!TX6{J1M7{aRKYg8@z@5#YJZpKQOJC%TJs6hAGX+Ym zsn@8vlD}m+`nYjjYVpvU-D9<#$bF#mil2KB1GVl!FJRqtyA-9GWh|P-)-6h)8SlVa z*{DUFmIk7o%uCZJWvRx16ys%CjK~^wwRKT)+uJ_jzT*2g-$jCgUhx7~xN5hSeqIDD zdjCb2v5R!Z6K(Cs|HS)7Z=)~s=%a4d-DQYAw($&$%(z_DvtBl@fOe^gNpdh%&pWZl zQkVbHL4;MT7sDo^uV`mbJMtraBM}Y6w-r|87@~V@3w}8YC#I5#8(#t1jD#;Xz2*aw zaWl(C^n7YP^)|Z2Ftr!ko_`z1)F95GMkM|8&g2ux&qhR(kr&v`Vt4s?Y|{Qi786)I zUzShpn?U}EW3TagbnJ0O>KK;s0i2tN*Sf5=cYBp>- zltb#QlE+&HKAJd#u=YsK(E#5hdiJ1C9=mJ5ZnLNYcf4Bn+t!MDJ!j5cg&u+=@rfPr zb%jB_^5ggi{lv%pU#^~l8V7nW5*~7+;*RBAgpRs2jDP(s{V0i1h`2?whkfUds6laW z#()V}`UGH(6a=vKOBa*9WA)#>@L9Gb1mV_J*e+_8uC^a_2chLna`o4(pfm&&V3EFJ zIB3%4CgJg)zxW^c#_uDc;hfMrVC)?ML!aj}8ONN3j^0ByjW0~-u8pM4$xynnko26S z5v~PuC>!X=wCdA=N3F*Gm zB@@1pOVcH>g!T`=(^#S;;}+Ht+!jvB78T7t@t zeWg*!!oB5;;q&6nv(<=I8wgaglzB8@mo?B|mQy9WbZZy_Ilc>&%4Pq+A-#9-avgJB z-laD&z;|-M{ZIkhLos$MLU!EX)Ovd|Kew8BPrteuoV)Wbh*RNUN6#)L7RgYv?=hIz z?;AHA)4pNkth?<>&n>;eWF8_H92@cuYZN?a*GVs(FZ=4!XqJT-Ka1J)rfy(>AK^&H zPVnT1ohI?)pZH#iIn|!vqTAHp!-lKfD3Z?f$I;?Om(iq}qIH6HU5=&c{V#h6xE0B8 zMjO!+ok3*R8J0xcA&>b3y;s!P$9ps%8*la0+7?m$lwpki@tr?$^Ff}RN4>`U+V27R z?&WG{lJS4)A$$==JJ@?&_sdkV{hribg6A*do6Cuc`<{8r^WFPj)wCQlWqQQSkuO1K zl^6#_uC&)7Sub6zgAj$yjG8gZ<2j40_M8e^6vrOX6}!5*j)E5!g-Z{lG0OZDjAdQb zioK@c1JRvLAIcPDVgq0b3&nw z_oT~5o;wyf-@q`4-vIp65?+CO#n~Gnyn`0Ny4nN5u@7Ac{yi~?RT3vl0uI`iR`uYH z^>m;)+(Apgvk$TXyt2R#)H@0XI(^JXR!&0>&o3&2DD&r1bzYgPwV)9W7mxn5kR8oICmc&kvQ`NyvN`bJLy^4v41;`zY(L$hT*mYyCnvif)Us(8jv!X{ zun{|ynDkeFOx=I0Wt*a4n8NpoqDff{DFcvtJ6wUJZ+d$7XQCa>oxl<9TVMe?zaP-; z!(ejYqGKwfyoKaL6jh3f-Up$;Lgx)ce+;6G4T8?zayZXPNqVTfm;-6uZ@afE!lbDp zWDYM3&hY6Zzr=T}cQi)0+qrt73K8OjGhVd!;6!IWODk@=?w2kJF3l~UboSxvhSkh> zZc8w}DDG4t7kYsU0)YK>Jf!1jE<)ulpBWOI89BOx`odbU($|mw22X^&8jQdCqv~4; zr@ep(57XUp!+PAvCIUq*J4?7GL4dAr?eSTq}#tg)R7=^z8s5UPXY@9^kmV0Sm z0`-#-w$+l!ADpnj=?8*;(}0(eIO`C16RUqBAC)OLa2qGE{Uk$9dX#^UZ=D?mDn2QVCQxfZA1nuP0`ktFX^%lTQRciup8*SHn$A9IhwWxF$bjMAr& zRY>d|-N7^pDa@WDrl1_Eo`GBzksE3<3lVai0D{3#Q1I*j{V#wWL>oSvy<0o~t{y=g z$8CFtT1+Lg?I%H}tYQ6vFOg*E#Z|f6Oh9|G)OYBh^T*EF+jDFwo%q{is?TLTtWxXm zO6v~tXn&Hk2SVSQM_YN~P+ice$!)bA_UA(qRJ4B&Vqf=lOwHRdP!tEmPmL&KypOlK z)Ma*AZ#{pF-|K0yQr?!6mfy5Hp*naw!M}tPUvZ?NCzOD$v(fzPMWuP3KE0DW%G&K% zM1b|=N0|>1>WHmIaEX{`*Id*7#fX%E7a21KqBtULoZ*5AZ?XpaNwg z$)B}$I^uo1!%FXs_Aj5zo9NEQ1^@R@7y8T2urmv!Yb{~Bsf7zN3I%|>T^E`S)^3!% zrdD9q35*?=GittwTLbaKyk7Dh8ZD0Fm!?6A6kA*&T7j8%J%f6bc542%J?Opvr<6PW zW;+Yx0A7_cu|x;03P!flIgOGsRU=X)8In>{)8!a@Efq_ReUCKuqKVi=YqW+s8jYo+ zNNAOzr3>1qicr!T$%#-)&~1LzKVjb77x$j?oO|Eg7x(kM4{}b@2FBUaQBv6pU6Z$) z_bFEXlzdr*Kk9Tqlb|vMlxQ|qd!{O18bbwVh);fttfXWOy%QBEEj*uTJ4B53(k z1Ka3E(PDzs$71Y?x9!Yk8rGZNqN6jaNm4s%RHq8bb#&w>3u~CDHHIY8@l@rhbfg}i zA03Hd;ghe+9WJPrZhI*6p!aGYtqW;&J2 zhvfHnYCohnrGqw9dI$l=2;7|aBwPHDUiuhYD9A?}Z$ZqR`W-C2_=k%o-ASG5Eph`Z z0lNyHDI2D^;4uYjUT!zG5u;ICNCcW~Ly#bSZkHpA@l~fMLWQiscw2+L@-46xby{$6 zD7OzBOLyYM7{KOt-?7>UCfy$DeZGtGg^R+x49{avRw+@`hF*5dS!#Gt*g+{OVm6up|RAe z9ZJd)yVtFV)H|aX17;5bb&V;zsF&+7o0{uds~kKwLJ#4%W`^>;bm8FKo_mJ+Dv>B0 zf)xo8fq4eN{bL)DUnEb|*;j-kPq% zwsZaX`lNxCBY{cvC`)rES_;!Jpu>G8tpwkbQXPiLO9(%>!r(Mj_@3fD^%X{U?41!L zJ4Fc)Cw~C0DbGkEYo*IatEnh)S`Js7<;c6hD?>FoXc2XH>rwpJ?cEHx<(^*RJkj&4 zi@M~M@}oBG90rU$i6`_w$Sn{0&W4*YM0x@*99IiOf0-D1$lXV8m`)tZ*s>SD8fmCF zju`>7h*t-o;y0@nM+3L_a?}rLmRH!AaN~Y1-IE5fVVrPcr5+2#^7I}Zj)%>?smQ^` zeo(fJc1GU)fnt9Swsm(uvc*|Sai0yKeO-dN?A?+)q#JV{E^~M@Oe~ z@XcAha9N+WB&=i1A`lld6EP+SyGb`*e<-MUfGIBzL=5rQSaU>b_rwPS=^E-tD4zdh&ZKuQ7)XSwp;s^BajiEQw1 zJ1l{C+C+tdq8>gn%nr-}COy9@nA-h`qO9~{wC|ZbEuHj5lbUxfT|je_%I6^(WjA7R z41{8Ke5kKsusD2wFw`UDWzgvR-kaC#fLaHjfESWW^qrHl6A;(`a-z*3+oH-~_#^{n zr6X=ETLWf^@YUoQDb{qXEgUXx9=g){=?0)%C83GVmm2MceV)0yD2nQP{0GpiX8?Ky zIqulk`D9XAY^(P9Kb4jM&$72#0zO*Dw^R$vwVMHJ(%#7;K(dR_U!+qJxxgtbcjUOq zLyHIYDEsAWnXxMe&^4R<7e&EPV0~%}xeSqbGJ#$mLqm0Ry}Zq&CRX?t zkjQPGKEH2=sk-RO%z`C`a*{;v>=dp0a`ELJK^l-=D%l8l9YF(BDo1rhWk14(8?W>Z zMsjXY`t$%-J7|-GGWO0N(eTm`Y6(?uht*C4DrS`NmC0Dl`a|5-yYb%;?VH~Hzj5=+ Y>&Kw?rr=C6&=O}CiFk_8?C3}T4{bb-JOBUy literal 0 HcmV?d00001 diff --git a/docs/output.md b/docs/output.md new file mode 100644 index 0000000..1342ddc --- /dev/null +++ b/docs/output.md @@ -0,0 +1,63 @@ +# nf-core/proteogenomics: Output + +## :warning: Please read this documentation on the nf-core website: [https://nf-co.re/proteogenomics/output](https://nf-co.re/proteogenomics/output) + +> _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._ + +## Introduction + +This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. + +The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. + + + +## Pipeline overview + +The pipeline is built using [Nextflow](https://www.nextflow.io/) +and processes data using the following steps: + +* [FastQC](#fastqc) - Read quality control +* [MultiQC](#multiqc) - Aggregate report describing results from the whole pipeline +* [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution + +## FastQC + +[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. + +For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). + +**Output files:** + +* `fastqc/` + * `*_fastqc.html`: FastQC report containing quality metrics for your untrimmed raw fastq files. +* `fastqc/zips/` + * `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. + +> **NB:** The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. + +## MultiQC + +[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarizing all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. + +The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. + +For more information about how to use MultiQC reports, see [https://multiqc.info](https://multiqc.info). + +**Output files:** + +* `multiqc/` + * `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + * `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + * `multiqc_plots/`: directory containing static images from the report in various formats. + +## Pipeline information + +[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. + +**Output files:** + +* `pipeline_info/` + * Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. + * Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.csv`. + * Documentation for interpretation of results in HTML format: `results_description.html`. diff --git a/docs/usage.md b/docs/usage.md new file mode 100644 index 0000000..34552e1 --- /dev/null +++ b/docs/usage.md @@ -0,0 +1,128 @@ +# nf-core/proteogenomics: Usage + +## :warning: Please read this documentation on the nf-core website: [https://nf-co.re/proteogenomics/usage](https://nf-co.re/proteogenomics/usage) + +> _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._ + +## Introduction + + + +## Running the pipeline + +The typical command for running the pipeline is as follows: + +```bash +nextflow run nf-core/proteogenomics --input '*_R{1,2}.fastq.gz' -profile docker +``` + +This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. + +Note that the pipeline will create the following files in your working directory: + +```bash +work # Directory containing the nextflow working files +results # Finished results (configurable, see below) +.nextflow_log # Log file from Nextflow +# Other nextflow hidden files, eg. history of pipeline runs and old logs. +``` + +### Updating the pipeline + +When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: + +```bash +nextflow pull nf-core/proteogenomics +``` + +### Reproducibility + +It's a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. + +First, go to the [nf-core/proteogenomics releases page](https://github.com/nf-core/proteogenomics/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. + +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. + +## Core Nextflow arguments + +> **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). + +### `-profile` + +Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. + +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Conda) - see below. + +> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. + +The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). + +Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! +They are loaded in sequence, so later profiles can overwrite earlier profiles. + +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended. + +* `docker` + * A generic configuration profile to be used with [Docker](https://docker.com/) + * Pulls software from Docker Hub: [`nfcore/proteogenomics`](https://hub.docker.com/r/nfcore/proteogenomics/) +* `singularity` + * A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) + * Pulls software from Docker Hub: [`nfcore/proteogenomics`](https://hub.docker.com/r/nfcore/proteogenomics/) +* `podman` + * A generic configuration profile to be used with [Podman](https://podman.io/) + * Pulls software from Docker Hub: [`nfcore/proteogenomics`](https://hub.docker.com/r/nfcore/proteogenomics/) +* `conda` + * Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity or Podman. + * A generic configuration profile to be used with [Conda](https://conda.io/docs/) + * Pulls most software from [Bioconda](https://bioconda.github.io/) +* `test` + * A profile with a complete configuration for automated testing + * Includes links to test data so needs no other parameters + +### `-resume` + +Specify this when restarting a pipeline. Nextflow will used cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. + +You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. + +### `-c` + +Specify the path to a specific config file (this is a core Nextflow command). See the [nf-core website documentation](https://nf-co.re/usage/configuration) for more information. + +#### Custom resource requests + +Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with an error code of `143` (exceeded requested resources) it will automatically resubmit with higher requests (2 x original, then 3 x original). If it still fails after three times then the pipeline is stopped. + +Whilst these default requirements will hopefully work for most people with most data, you may find that you want to customise the compute resources that the pipeline requests. You can do this by creating a custom config file. For example, to give the workflow process `star` 32GB of memory, you could use the following config: + +```nextflow +process { + withName: star { + memory = 32.GB + } +} +``` + +See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more information. + +If you are likely to be running `nf-core` pipelines regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter (see definition above). You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. + +If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). + +### Running in the background + +Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. + +The Nextflow `-bg` flag launches Nextflow in the background, detached from your terminal so that the workflow does not stop if you log out of your session. The logs are saved to a file. + +Alternatively, you can use `screen` / `tmux` or similar tool to create a detached session which you can log back into at a later time. +Some HPC setups also allow you to run nextflow within a cluster job submitted your job scheduler (from where it submits more jobs). + +#### Nextflow memory requirements + +In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. +We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): + +```bash +NXF_OPTS='-Xms1g -Xmx4g' +``` diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..a588315 --- /dev/null +++ b/environment.yml @@ -0,0 +1,15 @@ +# You can use this file to create a conda environment for this pipeline: +# conda env create -f environment.yml +name: nf-core-proteogenomics-1.0dev +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::python=3.7.3 + - conda-forge::markdown=3.1.1 + - conda-forge::pymdown-extensions=6.0 + - conda-forge::pygments=2.5.2 + # TODO nf-core: Add required software dependencies here + - bioconda::fastqc=0.11.8 + - bioconda::multiqc=1.7 diff --git a/main.nf b/main.nf new file mode 100644 index 0000000..b2e94a9 --- /dev/null +++ b/main.nf @@ -0,0 +1,435 @@ +#!/usr/bin/env nextflow +/* +======================================================================================== + nf-core/proteogenomics +======================================================================================== + nf-core/proteogenomics Analysis Pipeline. + #### Homepage / Documentation + https://github.com/nf-core/proteogenomics +---------------------------------------------------------------------------------------- +*/ + +def helpMessage() { + // TODO nf-core: Add to this help message with new command line parameters + log.info nfcoreHeader() + log.info""" + + Usage: + + The typical command for running the pipeline is as follows: + + nextflow run nf-core/proteogenomics --input '*_R{1,2}.fastq.gz' -profile docker + + Mandatory arguments: + --input [file] Path to input data (must be surrounded with quotes) + -profile [str] Configuration profile to use. Can use multiple (comma separated) + Available: conda, docker, singularity, test, awsbatch, and more + + Options: + --genome [str] Name of iGenomes reference + --single_end [bool] Specifies that the input is single-end reads + + References If not specified in the configuration file or you wish to overwrite any of the references + --fasta [file] Path to fasta reference + + Other options: + --outdir [file] The output directory where the results will be saved + --publish_dir_mode [str] Mode for publishing results in the output directory. Available: symlink, rellink, link, copy, copyNoFollow, move (Default: copy) + --email [email] Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits + --email_on_fail [email] Same as --email, except only send mail if the workflow is not successful + --max_multiqc_email_size [str] Threshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB) + -name [str] Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic + + AWSBatch options: + --awsqueue [str] The AWSBatch JobQueue that needs to be set when running on AWSBatch + --awsregion [str] The AWS Region for your AWS Batch job to run on + --awscli [str] Path to the AWS CLI tool + """.stripIndent() +} + +// Show help message +if (params.help) { + helpMessage() + exit 0 +} + +/* + * SET UP CONFIGURATION VARIABLES + */ + +// Check if genome exists in the config file +if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + exit 1, "The provided genome '${params.genome}' is not available in the iGenomes file. Currently the available genomes are ${params.genomes.keySet().join(", ")}" +} + +// TODO nf-core: Add any reference files that are needed +// Configurable reference genomes +// +// NOTE - THIS IS NOT USED IN THIS PIPELINE, EXAMPLE ONLY +// If you want to use the channel below in a process, define the following: +// input: +// file fasta from ch_fasta +// +params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false +if (params.fasta) { ch_fasta = file(params.fasta, checkIfExists: true) } + +// Has the run name been specified by the user? +// this has the bonus effect of catching both -name and --name +custom_runName = params.name +if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) { + custom_runName = workflow.runName +} + +// Check AWS batch settings +if (workflow.profile.contains('awsbatch')) { + // AWSBatch sanity checking + if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" + // Check outdir paths to be S3 buckets if running on AWSBatch + // related: https://github.com/nextflow-io/nextflow/issues/813 + if (!params.outdir.startsWith('s3:')) exit 1, "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" + // Prevent trace files to be stored on S3 since S3 does not support rolling files. + if (params.tracedir.startsWith('s3:')) exit 1, "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles." +} + +// Stage config files +ch_multiqc_config = file("$baseDir/assets/multiqc_config.yaml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() +ch_output_docs = file("$baseDir/docs/output.md", checkIfExists: true) +ch_output_docs_images = file("$baseDir/docs/images/", checkIfExists: true) + +/* + * Create a channel for input read files + */ +if (params.input_paths) { + if (params.single_end) { + Channel + .from(params.input_paths) + .map { row -> [ row[0], [ file(row[1][0], checkIfExists: true) ] ] } + .ifEmpty { exit 1, "params.input_paths was empty - no input files supplied" } + .into { ch_read_files_fastqc; ch_read_files_trimming } + } else { + Channel + .from(params.input_paths) + .map { row -> [ row[0], [ file(row[1][0], checkIfExists: true), file(row[1][1], checkIfExists: true) ] ] } + .ifEmpty { exit 1, "params.input_paths was empty - no input files supplied" } + .into { ch_read_files_fastqc; ch_read_files_trimming } + } +} else { + Channel + .fromFilePairs(params.input, size: params.single_end ? 1 : 2) + .ifEmpty { exit 1, "Cannot find any reads matching: ${params.input}\nNB: Path needs to be enclosed in quotes!\nIf this is single-end data, please specify --single_end on the command line." } + .into { ch_read_files_fastqc; ch_read_files_trimming } +} + +// Header log info +log.info nfcoreHeader() +def summary = [:] +if (workflow.revision) summary['Pipeline Release'] = workflow.revision +summary['Run Name'] = custom_runName ?: workflow.runName +// TODO nf-core: Report custom parameters here +summary['Input'] = params.input +summary['Fasta Ref'] = params.fasta +summary['Data Type'] = params.single_end ? 'Single-End' : 'Paired-End' +summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" +if (workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container" +summary['Output dir'] = params.outdir +summary['Launch dir'] = workflow.launchDir +summary['Working dir'] = workflow.workDir +summary['Script dir'] = workflow.projectDir +summary['User'] = workflow.userName +if (workflow.profile.contains('awsbatch')) { + summary['AWS Region'] = params.awsregion + summary['AWS Queue'] = params.awsqueue + summary['AWS CLI'] = params.awscli +} +summary['Config Profile'] = workflow.profile +if (params.config_profile_description) summary['Config Profile Description'] = params.config_profile_description +if (params.config_profile_contact) summary['Config Profile Contact'] = params.config_profile_contact +if (params.config_profile_url) summary['Config Profile URL'] = params.config_profile_url +summary['Config Files'] = workflow.configFiles.join(', ') +if (params.email || params.email_on_fail) { + summary['E-mail Address'] = params.email + summary['E-mail on failure'] = params.email_on_fail + summary['MultiQC maxsize'] = params.max_multiqc_email_size +} +log.info summary.collect { k,v -> "${k.padRight(18)}: $v" }.join("\n") +log.info "-\033[2m--------------------------------------------------\033[0m-" + +// Check the hostnames against configured profiles +checkHostname() + +Channel.from(summary.collect{ [it.key, it.value] }) + .map { k,v -> "
$k
" } + .reduce { a, b -> return [a, b].join("\n ") } + .map { x -> """ + id: 'nf-core-proteogenomics-summary' + description: " - this information is collected when the pipeline is started." + section_name: 'nf-core/proteogenomics Workflow Summary' + section_href: 'https://github.com/nf-core/proteogenomics' + plot_type: 'html' + data: | +
+ $x +
+ """.stripIndent() } + .set { ch_workflow_summary } + +/* + * Parse software version numbers + */ +process get_software_versions { + publishDir "${params.outdir}/pipeline_info", mode: params.publish_dir_mode, + saveAs: { filename -> + if (filename.indexOf(".csv") > 0) filename + else null + } + + output: + file 'software_versions_mqc.yaml' into ch_software_versions_yaml + file "software_versions.csv" + + script: + // TODO nf-core: Get all tools to print their version number here + """ + echo $workflow.manifest.version > v_pipeline.txt + echo $workflow.nextflow.version > v_nextflow.txt + fastqc --version > v_fastqc.txt + multiqc --version > v_multiqc.txt + scrape_software_versions.py &> software_versions_mqc.yaml + """ +} + +/* + * STEP 1 - FastQC + */ +process fastqc { + tag "$name" + label 'process_medium' + publishDir "${params.outdir}/fastqc", mode: params.publish_dir_mode, + saveAs: { filename -> + filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename" + } + + input: + set val(name), file(reads) from ch_read_files_fastqc + + output: + file "*_fastqc.{zip,html}" into ch_fastqc_results + + script: + """ + fastqc --quiet --threads $task.cpus $reads + """ +} + +/* + * STEP 2 - MultiQC + */ +process multiqc { + publishDir "${params.outdir}/MultiQC", mode: params.publish_dir_mode + + input: + file (multiqc_config) from ch_multiqc_config + file (mqc_custom_config) from ch_multiqc_custom_config.collect().ifEmpty([]) + // TODO nf-core: Add in log files from your new processes for MultiQC to find! + file ('fastqc/*') from ch_fastqc_results.collect().ifEmpty([]) + file ('software_versions/*') from ch_software_versions_yaml.collect() + file workflow_summary from ch_workflow_summary.collectFile(name: "workflow_summary_mqc.yaml") + + output: + file "*multiqc_report.html" into ch_multiqc_report + file "*_data" + file "multiqc_plots" + + script: + rtitle = custom_runName ? "--title \"$custom_runName\"" : '' + rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : '' + custom_config_file = params.multiqc_config ? "--config $mqc_custom_config" : '' + // TODO nf-core: Specify which MultiQC modules to use with -m for a faster run time + """ + multiqc -f $rtitle $rfilename $custom_config_file . + """ +} + +/* + * STEP 3 - Output Description HTML + */ +process output_documentation { + publishDir "${params.outdir}/pipeline_info", mode: params.publish_dir_mode + + input: + file output_docs from ch_output_docs + file images from ch_output_docs_images + + output: + file "results_description.html" + + script: + """ + markdown_to_html.py $output_docs -o results_description.html + """ +} + +/* + * Completion e-mail notification + */ +workflow.onComplete { + + // Set up the e-mail variables + def subject = "[nf-core/proteogenomics] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[nf-core/proteogenomics] FAILED: $workflow.runName" + } + def email_fields = [:] + email_fields['version'] = workflow.manifest.version + email_fields['runName'] = custom_runName ?: workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary + email_fields['summary']['Date Started'] = workflow.start + email_fields['summary']['Date Completed'] = workflow.complete + email_fields['summary']['Pipeline script file path'] = workflow.scriptFile + email_fields['summary']['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision + email_fields['summary']['Nextflow Version'] = workflow.nextflow.version + email_fields['summary']['Nextflow Build'] = workflow.nextflow.build + email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + // TODO nf-core: If not using MultiQC, strip out this code (including params.max_multiqc_email_size) + // On success try attach the multiqc report + def mqc_report = null + try { + if (workflow.success) { + mqc_report = ch_multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList) { + log.warn "[nf-core/proteogenomics] Found multiple reports from process 'multiqc', will use only one" + mqc_report = mqc_report[0] + } + } + } catch (all) { + log.warn "[nf-core/proteogenomics] Could not attach MultiQC report to summary email" + } + + // Check if we are only sending emails on failure + email_address = params.email + if (!params.email && params.email_on_fail && !workflow.success) { + email_address = params.email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("$baseDir/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("$baseDir/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir", mqcFile: mqc_report, mqcMaxSize: params.max_multiqc_email_size.toBytes() ] + def sf = new File("$baseDir/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + if (email_address) { + try { + if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "[nf-core/proteogenomics] Sent summary e-mail to $email_address (sendmail)" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + if ( mqc_report.size() <= params.max_multiqc_email_size.toBytes() ) { + mail_cmd += [ '-A', mqc_report ] + } + mail_cmd.execute() << email_html + log.info "[nf-core/proteogenomics] Sent summary e-mail to $email_address (mail)" + } + } + + // Write summary e-mail HTML to a file + def output_d = new File("${params.outdir}/pipeline_info/") + if (!output_d.exists()) { + output_d.mkdirs() + } + def output_hf = new File(output_d, "pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + def output_tf = new File(output_d, "pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + + c_green = params.monochrome_logs ? '' : "\033[0;32m"; + c_purple = params.monochrome_logs ? '' : "\033[0;35m"; + c_red = params.monochrome_logs ? '' : "\033[0;31m"; + c_reset = params.monochrome_logs ? '' : "\033[0m"; + + if (workflow.stats.ignoredCount > 0 && workflow.success) { + log.info "-${c_purple}Warning, pipeline completed, but with errored process(es) ${c_reset}-" + log.info "-${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCount} ${c_reset}-" + log.info "-${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCount} ${c_reset}-" + } + + if (workflow.success) { + log.info "-${c_purple}[nf-core/proteogenomics]${c_green} Pipeline completed successfully${c_reset}-" + } else { + checkHostname() + log.info "-${c_purple}[nf-core/proteogenomics]${c_red} Pipeline completed with errors${c_reset}-" + } + +} + + +def nfcoreHeader() { + // Log colors ANSI codes + c_black = params.monochrome_logs ? '' : "\033[0;30m"; + c_blue = params.monochrome_logs ? '' : "\033[0;34m"; + c_cyan = params.monochrome_logs ? '' : "\033[0;36m"; + c_dim = params.monochrome_logs ? '' : "\033[2m"; + c_green = params.monochrome_logs ? '' : "\033[0;32m"; + c_purple = params.monochrome_logs ? '' : "\033[0;35m"; + c_reset = params.monochrome_logs ? '' : "\033[0m"; + c_white = params.monochrome_logs ? '' : "\033[0;37m"; + c_yellow = params.monochrome_logs ? '' : "\033[0;33m"; + + return """ -${c_dim}--------------------------------------------------${c_reset}- + ${c_green},--.${c_black}/${c_green},-.${c_reset} + ${c_blue} ___ __ __ __ ___ ${c_green}/,-._.--~\'${c_reset} + ${c_blue} |\\ | |__ __ / ` / \\ |__) |__ ${c_yellow}} {${c_reset} + ${c_blue} | \\| | \\__, \\__/ | \\ |___ ${c_green}\\`-._,-`-,${c_reset} + ${c_green}`._,._,\'${c_reset} + ${c_purple} nf-core/proteogenomics v${workflow.manifest.version}${c_reset} + -${c_dim}--------------------------------------------------${c_reset}- + """.stripIndent() +} + +def checkHostname() { + def c_reset = params.monochrome_logs ? '' : "\033[0m" + def c_white = params.monochrome_logs ? '' : "\033[0;37m" + def c_red = params.monochrome_logs ? '' : "\033[1;91m" + def c_yellow_bold = params.monochrome_logs ? '' : "\033[1;93m" + if (params.hostnames) { + def hostname = "hostname".execute().text.trim() + params.hostnames.each { prof, hnames -> + hnames.each { hname -> + if (hostname.contains(hname) && !workflow.profile.contains(prof)) { + log.error "====================================================\n" + + " ${c_red}WARNING!${c_reset} You are running with `-profile $workflow.profile`\n" + + " but your machine hostname is ${c_white}'$hostname'${c_reset}\n" + + " ${c_yellow_bold}It's highly recommended that you use `-profile $prof${c_reset}`\n" + + "============================================================" + } + } + } + } +} diff --git a/nextflow.config b/nextflow.config new file mode 100644 index 0000000..46a291c --- /dev/null +++ b/nextflow.config @@ -0,0 +1,153 @@ +/* + * ------------------------------------------------- + * nf-core/proteogenomics Nextflow config file + * ------------------------------------------------- + * Default config options for all environments. + */ + +// Global default params, used in configs +params { + + // Workflow flags + // TODO nf-core: Specify your pipeline's command line flags + genome = false + input = "data/*{1,2}.fastq.gz" + single_end = false + outdir = './results' + publish_dir_mode = 'copy' + + // Boilerplate options + name = false + multiqc_config = false + email = false + email_on_fail = false + max_multiqc_email_size = 25.MB + plaintext_email = false + monochrome_logs = false + help = false + igenomes_base = 's3://ngi-igenomes/igenomes/' + tracedir = "${params.outdir}/pipeline_info" + igenomes_ignore = false + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + hostnames = false + config_profile_description = false + config_profile_contact = false + config_profile_url = false + + // Defaults only, expecting to be overwritten + max_memory = 128.GB + max_cpus = 16 + max_time = 240.h + +} + +// Container slug. Stable releases should specify release tag! +// Developmental code should specify :dev +process.container = 'nfcore/proteogenomics:dev' + +// Load base.config by default for all pipelines +includeConfig 'conf/base.config' + +// Load nf-core custom profiles from different Institutions +try { + includeConfig "${params.custom_config_base}/nfcore_custom.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") +} + +profiles { + conda { process.conda = "$baseDir/environment.yml" } + debug { process.beforeScript = 'echo $HOSTNAME' } + docker { + docker.enabled = true + // Avoid this error: + // WARNING: Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap. + // Testing this in nf-core after discussion here https://github.com/nf-core/tools/pull/351 + // once this is established and works well, nextflow might implement this behavior as new default. + docker.runOptions = '-u \$(id -u):\$(id -g)' + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + } + podman { + podman.enabled = true + } + test { includeConfig 'conf/test.config' } +} + +// Load igenomes.config if required +if (!params.igenomes_ignore) { + includeConfig 'conf/igenomes.config' +} + +// Export these variables to prevent local Python/R libraries from conflicting with those in the container +env { + PYTHONNOUSERSITE = 1 + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" +} + +// Capture exit codes from upstream processes when piping +process.shell = ['/bin/bash', '-euo', 'pipefail'] + +timeline { + enabled = true + file = "${params.tracedir}/execution_timeline.html" +} +report { + enabled = true + file = "${params.tracedir}/execution_report.html" +} +trace { + enabled = true + file = "${params.tracedir}/execution_trace.txt" +} +dag { + enabled = true + file = "${params.tracedir}/pipeline_dag.svg" +} + +manifest { + name = 'nf-core/proteogenomics' + author = 'enomicsSheynkman Group and Smith Group' + homePage = 'https://github.com/nf-core/proteogenomics' + description = 'A workflow for delineating the Human Proteome at Isoform Resolution Through Long-read Proteog' + mainScript = 'main.nf' + nextflowVersion = '>=19.10.0' + version = '1.0dev' +} + +// Function to ensure that resource requirements don't go beyond +// a maximum limit +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min( obj, params.max_cpus as int ) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} diff --git a/nextflow_schema.json b/nextflow_schema.json new file mode 100644 index 0000000..fcdcc24 --- /dev/null +++ b/nextflow_schema.json @@ -0,0 +1,259 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/proteogenomics/master/nextflow_schema.json", + "title": "nf-core/proteogenomics pipeline parameters", + "description": "A workflow for delineating the Human Proteome at Isoform Resolution Through Long-read Proteog", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": [ + "input" + ], + "properties": { + "input": { + "type": "string", + "fa_icon": "fas fa-dna", + "description": "Input FastQ files.", + "help_text": "Use this to specify the location of your input FastQ files. For example:\n\n```bash\n--input 'path/to/data/sample_*_{1,2}.fastq'\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The path must have at least one `*` wildcard character\n3. When using the pipeline with paired end data, the path must use `{1,2}` notation to specify read pairs.\n\nIf left unspecified, a default pattern is used: `data/*{1,2}.fastq.gz`" + }, + "single_end": { + "type": "boolean", + "description": "Specifies that the input is single-end reads.", + "fa_icon": "fas fa-align-center", + "help_text": "By default, the pipeline expects paired-end data. If you have single-end data, you need to specify `--single_end` on the command line when you launch the pipeline. A normal glob pattern, enclosed in quotation marks, can then be used for `--input`. For example:\n\n```bash\n--single_end --input '*.fastq'\n```\n\nIt is not possible to run a mixture of single-end and paired-end files in one run." + }, + "outdir": { + "type": "string", + "description": "The output directory where the results will be saved.", + "default": "./results", + "fa_icon": "fas fa-folder-open" + }, + "email": { + "type": "string", + "description": "Email address for completion summary.", + "fa_icon": "fas fa-envelope", + "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" + } + } + }, + "reference_genome_options": { + "title": "Reference genome options", + "type": "object", + "fa_icon": "fas fa-dna", + "description": "Options for the reference genome indices used to align reads.", + "properties": { + "genome": { + "type": "string", + "description": "Name of iGenomes reference.", + "fa_icon": "fas fa-book", + "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`.\n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + }, + "fasta": { + "type": "string", + "fa_icon": "fas fa-font", + "description": "Path to FASTA genome file.", + "help_text": "If you have no genome reference available, the pipeline can build one using a FASTA file. This requires additional time and resources, so it's better to use a pre-build index if possible." + }, + "igenomes_base": { + "type": "string", + "description": "Directory / URL base for iGenomes references.", + "default": "s3://ngi-igenomes/igenomes/", + "fa_icon": "fas fa-cloud-download-alt", + "hidden": true + }, + "igenomes_ignore": { + "type": "boolean", + "description": "Do not load the iGenomes reference config.", + "fa_icon": "fas fa-ban", + "hidden": true, + "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "hidden": true, + "fa_icon": "fas fa-question-circle" + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "hidden": true, + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ] + }, + "name": { + "type": "string", + "description": "Workflow name.", + "fa_icon": "fas fa-fingerprint", + "hidden": true, + "help_text": "A custom name for the pipeline run. Unlike the core nextflow `-name` option with one hyphen this parameter can be reused multiple times, for example if using `-resume`. Passed through to steps such as MultiQC and used for things like report filenames and titles." + }, + "email_on_fail": { + "type": "string", + "description": "Email address for completion summary, only when pipeline fails.", + "fa_icon": "fas fa-exclamation-triangle", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "hidden": true, + "help_text": "This works exactly as with `--email`, except emails are only sent if the workflow is not successful." + }, + "plaintext_email": { + "type": "boolean", + "description": "Send plain-text email instead of HTML.", + "fa_icon": "fas fa-remove-format", + "hidden": true, + "help_text": "Set to receive plain-text e-mails instead of HTML formatted." + }, + "max_multiqc_email_size": { + "type": "string", + "description": "File size limit when attaching MultiQC reports to summary emails.", + "default": "25.MB", + "fa_icon": "fas fa-file-upload", + "hidden": true, + "help_text": "If file generated by pipeline exceeds the threshold, it will not be attached." + }, + "monochrome_logs": { + "type": "boolean", + "description": "Do not use coloured log outputs.", + "fa_icon": "fas fa-palette", + "hidden": true, + "help_text": "Set to disable colourful command line output and live life in monochrome." + }, + "multiqc_config": { + "type": "string", + "description": "Custom config file to supply to MultiQC.", + "fa_icon": "fas fa-cog", + "hidden": true + }, + "tracedir": { + "type": "string", + "description": "Directory to keep pipeline Nextflow logs and reports.", + "default": "${params.outdir}/pipeline_info", + "fa_icon": "fas fa-cogs", + "hidden": true + } + } + }, + "max_job_request_options": { + "title": "Max job request options", + "type": "object", + "fa_icon": "fab fa-acquisitions-incorporated", + "description": "Set the top limit for requested resources for any single job.", + "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", + "properties": { + "max_cpus": { + "type": "integer", + "description": "Maximum number of CPUs that can be requested for any single job.", + "default": 16, + "fa_icon": "fas fa-microchip", + "hidden": true, + "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" + }, + "max_memory": { + "type": "string", + "description": "Maximum amount of memory that can be requested for any single job.", + "default": "128.GB", + "fa_icon": "fas fa-memory", + "hidden": true, + "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" + }, + "max_time": { + "type": "string", + "description": "Maximum amount of time that can be requested for any single job.", + "default": "240.h", + "fa_icon": "far fa-clock", + "hidden": true, + "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" + } + } + }, + "institutional_config_options": { + "title": "Institutional config options", + "type": "object", + "fa_icon": "fas fa-university", + "description": "Parameters used to describe centralised config profiles. These should not be edited.", + "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", + "properties": { + "custom_config_version": { + "type": "string", + "description": "Git commit id for Institutional configs.", + "default": "master", + "hidden": true, + "fa_icon": "fas fa-users-cog", + "help_text": "Provide git commit id for custom Institutional configs hosted at `nf-core/configs`. This was implemented for reproducibility purposes. Default: `master`.\n\n```bash\n## Download and use config file with following git commit id\n--custom_config_version d52db660777c4bf36546ddb188ec530c3ada1b96\n```" + }, + "custom_config_base": { + "type": "string", + "description": "Base directory for Institutional configs.", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "hidden": true, + "help_text": "If you're running offline, nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell nextflow where to find them with the `custom_config_base` option. For example:\n\n```bash\n## Download and unzip the config files\ncd /path/to/my/configs\nwget https://github.com/nf-core/configs/archive/master.zip\nunzip master.zip\n\n## Run the pipeline\ncd /path/to/my/data\nnextflow run /path/to/pipeline/ --custom_config_base /path/to/my/configs/configs-master/\n```\n\n> Note that the nf-core/tools helper package has a `download` command to download all required pipeline files + singularity containers + institutional configs in one go for you, to make this process easier.", + "fa_icon": "fas fa-users-cog" + }, + "hostnames": { + "type": "string", + "description": "Institutional configs hostname.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_description": { + "type": "string", + "description": "Institutional config description.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_contact": { + "type": "string", + "description": "Institutional config contact information.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_url": { + "type": "string", + "description": "Institutional config URL link.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/reference_genome_options" + }, + { + "$ref": "#/definitions/generic_options" + }, + { + "$ref": "#/definitions/max_job_request_options" + }, + { + "$ref": "#/definitions/institutional_config_options" + } + ] +} From 137aa415363fc2701d1d601c11e59a32bcdee8a8 Mon Sep 17 00:00:00 2001 From: cgpu Date: Sat, 7 Nov 2020 16:57:03 -0500 Subject: [PATCH 02/36] Cleans up template main.nf and adds swag cli message --- main.nf | 375 ++++++-------------------------------------------------- 1 file changed, 35 insertions(+), 340 deletions(-) diff --git a/main.nf b/main.nf index b2e94a9..30fac88 100644 --- a/main.nf +++ b/main.nf @@ -1,24 +1,24 @@ #!/usr/bin/env nextflow /* ======================================================================================== - nf-core/proteogenomics + sheynkman-lab/Long-Read-Proteogenomics ======================================================================================== - nf-core/proteogenomics Analysis Pipeline. + sheynkman-lab/Long-Read-Proteogenomics Analysis Pipeline. #### Homepage / Documentation - https://github.com/nf-core/proteogenomics + https://github.com/sheynkman-lab/Long-Read-Proteogenomics ---------------------------------------------------------------------------------------- */ def helpMessage() { // TODO nf-core: Add to this help message with new command line parameters - log.info nfcoreHeader() + log.info logHeader() log.info""" Usage: The typical command for running the pipeline is as follows: - nextflow run nf-core/proteogenomics --input '*_R{1,2}.fastq.gz' -profile docker + nextflow run sheynkman-lab/Long-Read-Proteogenomics --input -profile docker Mandatory arguments: --input [file] Path to input data (must be surrounded with quotes) @@ -26,7 +26,6 @@ def helpMessage() { Available: conda, docker, singularity, test, awsbatch, and more Options: - --genome [str] Name of iGenomes reference --single_end [bool] Specifies that the input is single-end reads References If not specified in the configuration file or you wish to overwrite any of the references @@ -57,340 +56,41 @@ if (params.help) { * SET UP CONFIGURATION VARIABLES */ -// Check if genome exists in the config file -if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - exit 1, "The provided genome '${params.genome}' is not available in the iGenomes file. Currently the available genomes are ${params.genomes.keySet().join(", ")}" -} - -// TODO nf-core: Add any reference files that are needed -// Configurable reference genomes -// -// NOTE - THIS IS NOT USED IN THIS PIPELINE, EXAMPLE ONLY -// If you want to use the channel below in a process, define the following: -// input: -// file fasta from ch_fasta -// -params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false -if (params.fasta) { ch_fasta = file(params.fasta, checkIfExists: true) } - -// Has the run name been specified by the user? -// this has the bonus effect of catching both -name and --name -custom_runName = params.name -if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) { - custom_runName = workflow.runName -} - -// Check AWS batch settings -if (workflow.profile.contains('awsbatch')) { - // AWSBatch sanity checking - if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - // related: https://github.com/nextflow-io/nextflow/issues/813 - if (!params.outdir.startsWith('s3:')) exit 1, "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - // Prevent trace files to be stored on S3 since S3 does not support rolling files. - if (params.tracedir.startsWith('s3:')) exit 1, "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles." -} - -// Stage config files -ch_multiqc_config = file("$baseDir/assets/multiqc_config.yaml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() -ch_output_docs = file("$baseDir/docs/output.md", checkIfExists: true) -ch_output_docs_images = file("$baseDir/docs/images/", checkIfExists: true) - -/* - * Create a channel for input read files - */ -if (params.input_paths) { - if (params.single_end) { - Channel - .from(params.input_paths) - .map { row -> [ row[0], [ file(row[1][0], checkIfExists: true) ] ] } - .ifEmpty { exit 1, "params.input_paths was empty - no input files supplied" } - .into { ch_read_files_fastqc; ch_read_files_trimming } - } else { - Channel - .from(params.input_paths) - .map { row -> [ row[0], [ file(row[1][0], checkIfExists: true), file(row[1][1], checkIfExists: true) ] ] } - .ifEmpty { exit 1, "params.input_paths was empty - no input files supplied" } - .into { ch_read_files_fastqc; ch_read_files_trimming } - } -} else { - Channel - .fromFilePairs(params.input, size: params.single_end ? 1 : 2) - .ifEmpty { exit 1, "Cannot find any reads matching: ${params.input}\nNB: Path needs to be enclosed in quotes!\nIf this is single-end data, please specify --single_end on the command line." } - .into { ch_read_files_fastqc; ch_read_files_trimming } -} // Header log info -log.info nfcoreHeader() +log.info logHeader() def summary = [:] if (workflow.revision) summary['Pipeline Release'] = workflow.revision -summary['Run Name'] = custom_runName ?: workflow.runName // TODO nf-core: Report custom parameters here -summary['Input'] = params.input -summary['Fasta Ref'] = params.fasta -summary['Data Type'] = params.single_end ? 'Single-End' : 'Paired-End' +if (params.input) summary['Input'] = params.input summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" -if (workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container" summary['Output dir'] = params.outdir -summary['Launch dir'] = workflow.launchDir -summary['Working dir'] = workflow.workDir -summary['Script dir'] = workflow.projectDir -summary['User'] = workflow.userName if (workflow.profile.contains('awsbatch')) { summary['AWS Region'] = params.awsregion summary['AWS Queue'] = params.awsqueue summary['AWS CLI'] = params.awscli } summary['Config Profile'] = workflow.profile -if (params.config_profile_description) summary['Config Profile Description'] = params.config_profile_description -if (params.config_profile_contact) summary['Config Profile Contact'] = params.config_profile_contact -if (params.config_profile_url) summary['Config Profile URL'] = params.config_profile_url -summary['Config Files'] = workflow.configFiles.join(', ') -if (params.email || params.email_on_fail) { - summary['E-mail Address'] = params.email - summary['E-mail on failure'] = params.email_on_fail - summary['MultiQC maxsize'] = params.max_multiqc_email_size -} log.info summary.collect { k,v -> "${k.padRight(18)}: $v" }.join("\n") log.info "-\033[2m--------------------------------------------------\033[0m-" -// Check the hostnames against configured profiles -checkHostname() - -Channel.from(summary.collect{ [it.key, it.value] }) - .map { k,v -> "
$k
${v ?: 'N/A'}
" } - .reduce { a, b -> return [a, b].join("\n ") } - .map { x -> """ - id: 'nf-core-proteogenomics-summary' - description: " - this information is collected when the pipeline is started." - section_name: 'nf-core/proteogenomics Workflow Summary' - section_href: 'https://github.com/nf-core/proteogenomics' - plot_type: 'html' - data: | -
- $x -
- """.stripIndent() } - .set { ch_workflow_summary } - /* - * Parse software version numbers + * STEP - validate template */ -process get_software_versions { - publishDir "${params.outdir}/pipeline_info", mode: params.publish_dir_mode, - saveAs: { filename -> - if (filename.indexOf(".csv") > 0) filename - else null - } - output: - file 'software_versions_mqc.yaml' into ch_software_versions_yaml - file "software_versions.csv" - - script: - // TODO nf-core: Get all tools to print their version number here - """ - echo $workflow.manifest.version > v_pipeline.txt - echo $workflow.nextflow.version > v_nextflow.txt - fastqc --version > v_fastqc.txt - multiqc --version > v_multiqc.txt - scrape_software_versions.py &> software_versions_mqc.yaml - """ -} - -/* - * STEP 1 - FastQC - */ -process fastqc { - tag "$name" - label 'process_medium' - publishDir "${params.outdir}/fastqc", mode: params.publish_dir_mode, - saveAs: { filename -> - filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename" - } - - input: - set val(name), file(reads) from ch_read_files_fastqc +process validate { + publishDir "${params.outdir}/validate/", mode: params.publish_dir_mode output: - file "*_fastqc.{zip,html}" into ch_fastqc_results + file "validated.txt" script: """ - fastqc --quiet --threads $task.cpus $reads + touch validated.txt """ } -/* - * STEP 2 - MultiQC - */ -process multiqc { - publishDir "${params.outdir}/MultiQC", mode: params.publish_dir_mode - - input: - file (multiqc_config) from ch_multiqc_config - file (mqc_custom_config) from ch_multiqc_custom_config.collect().ifEmpty([]) - // TODO nf-core: Add in log files from your new processes for MultiQC to find! - file ('fastqc/*') from ch_fastqc_results.collect().ifEmpty([]) - file ('software_versions/*') from ch_software_versions_yaml.collect() - file workflow_summary from ch_workflow_summary.collectFile(name: "workflow_summary_mqc.yaml") - - output: - file "*multiqc_report.html" into ch_multiqc_report - file "*_data" - file "multiqc_plots" - - script: - rtitle = custom_runName ? "--title \"$custom_runName\"" : '' - rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : '' - custom_config_file = params.multiqc_config ? "--config $mqc_custom_config" : '' - // TODO nf-core: Specify which MultiQC modules to use with -m for a faster run time - """ - multiqc -f $rtitle $rfilename $custom_config_file . - """ -} - -/* - * STEP 3 - Output Description HTML - */ -process output_documentation { - publishDir "${params.outdir}/pipeline_info", mode: params.publish_dir_mode - - input: - file output_docs from ch_output_docs - file images from ch_output_docs_images - - output: - file "results_description.html" - - script: - """ - markdown_to_html.py $output_docs -o results_description.html - """ -} - -/* - * Completion e-mail notification - */ -workflow.onComplete { - - // Set up the e-mail variables - def subject = "[nf-core/proteogenomics] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[nf-core/proteogenomics] FAILED: $workflow.runName" - } - def email_fields = [:] - email_fields['version'] = workflow.manifest.version - email_fields['runName'] = custom_runName ?: workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary - email_fields['summary']['Date Started'] = workflow.start - email_fields['summary']['Date Completed'] = workflow.complete - email_fields['summary']['Pipeline script file path'] = workflow.scriptFile - email_fields['summary']['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision - email_fields['summary']['Nextflow Version'] = workflow.nextflow.version - email_fields['summary']['Nextflow Build'] = workflow.nextflow.build - email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - // TODO nf-core: If not using MultiQC, strip out this code (including params.max_multiqc_email_size) - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = ch_multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList) { - log.warn "[nf-core/proteogenomics] Found multiple reports from process 'multiqc', will use only one" - mqc_report = mqc_report[0] - } - } - } catch (all) { - log.warn "[nf-core/proteogenomics] Could not attach MultiQC report to summary email" - } - - // Check if we are only sending emails on failure - email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$baseDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$baseDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir", mqcFile: mqc_report, mqcMaxSize: params.max_multiqc_email_size.toBytes() ] - def sf = new File("$baseDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "[nf-core/proteogenomics] Sent summary e-mail to $email_address (sendmail)" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= params.max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "[nf-core/proteogenomics] Sent summary e-mail to $email_address (mail)" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - - c_green = params.monochrome_logs ? '' : "\033[0;32m"; - c_purple = params.monochrome_logs ? '' : "\033[0;35m"; - c_red = params.monochrome_logs ? '' : "\033[0;31m"; - c_reset = params.monochrome_logs ? '' : "\033[0m"; - - if (workflow.stats.ignoredCount > 0 && workflow.success) { - log.info "-${c_purple}Warning, pipeline completed, but with errored process(es) ${c_reset}-" - log.info "-${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCount} ${c_reset}-" - log.info "-${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCount} ${c_reset}-" - } - - if (workflow.success) { - log.info "-${c_purple}[nf-core/proteogenomics]${c_green} Pipeline completed successfully${c_reset}-" - } else { - checkHostname() - log.info "-${c_purple}[nf-core/proteogenomics]${c_red} Pipeline completed with errors${c_reset}-" - } - -} - - -def nfcoreHeader() { +def logHeader() { // Log colors ANSI codes c_black = params.monochrome_logs ? '' : "\033[0;30m"; c_blue = params.monochrome_logs ? '' : "\033[0;34m"; @@ -403,33 +103,28 @@ def nfcoreHeader() { c_yellow = params.monochrome_logs ? '' : "\033[0;33m"; return """ -${c_dim}--------------------------------------------------${c_reset}- - ${c_green},--.${c_black}/${c_green},-.${c_reset} - ${c_blue} ___ __ __ __ ___ ${c_green}/,-._.--~\'${c_reset} - ${c_blue} |\\ | |__ __ / ` / \\ |__) |__ ${c_yellow}} {${c_reset} - ${c_blue} | \\| | \\__, \\__/ | \\ |___ ${c_green}\\`-._,-`-,${c_reset} - ${c_green}`._,._,\'${c_reset} - ${c_purple} nf-core/proteogenomics v${workflow.manifest.version}${c_reset} + ${c_cyan} sheynkman-lab/Long-Read-Proteogenomics v${workflow.manifest.version}${c_reset} + ${c_cyan}███████╗██╗ ██╗███████╗██╗ ██╗███╗ ██╗██╗ ██╗███╗ ███╗ █████╗ ███╗ ██╗ ██╗ █████╗ ██████╗ ${c_reset} + ${c_cyan}██╔════╝██║ ██║██╔════╝╚██╗ ██╔╝████╗ ██║██║ ██╔╝████╗ ████║██╔══██╗████╗ ██║ ██║ ██╔══██╗██╔══██╗ ${c_reset} + ${c_cyan}███████╗███████║█████╗ ╚████╔╝ ██╔██╗ ██║█████╔╝ ██╔████╔██║███████║██╔██╗ ██║█████╗██║ ███████║██████╔╝ ${c_reset} + ${c_cyan}╚════██║██╔══██║██╔══╝ ╚██╔╝ ██║╚██╗██║██╔═██╗ ██║╚██╔╝██║██╔══██║██║╚██╗██║╚════╝██║ ██╔══██║██╔══██╗ ${c_reset} + ${c_cyan}███████║██║ ██║███████╗ ██║ ██║ ╚████║██║ ██╗██║ ╚═╝ ██║██║ ██║██║ ╚████║ ███████╗██║ ██║██████╔╝ ${c_reset} + ${c_cyan}╚══════╝╚═╝ ╚═╝╚══════╝ ╚═╝ ╚═╝ ╚═══╝╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═══╝ ╚══════╝╚═╝ ╚═╝╚═════╝ ${c_reset} + ${c_cyan} ${c_reset} + ${c_cyan}██╗ ██████╗ ███╗ ██╗ ██████╗ ██████╗ ███████╗ █████╗ ██████╗ ${c_reset} + ${c_cyan}██║ ██╔═══██╗████╗ ██║██╔════╝ ██╔══██╗██╔════╝██╔══██╗██╔══██╗ ${c_reset} + ${c_cyan}██║ ██║ ██║██╔██╗ ██║██║ ███╗ ██████╔╝█████╗ ███████║██║ ██║ ${c_reset} + ${c_cyan}██║ ██║ ██║██║╚██╗██║██║ ██║ ██╔══██╗██╔══╝ ██╔══██║██║ ██║ ${c_reset} + ${c_cyan}███████╗╚██████╔╝██║ ╚████║╚██████╔╝ ██║ ██║███████╗██║ ██║██████╔╝ ${c_reset} + ${c_cyan}╚══════╝ ╚═════╝ ╚═╝ ╚═══╝ ╚═════╝ ╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═════╝ ${c_reset} + ${c_cyan} ${c_reset} + ${c_cyan}██████╗ ██████╗ ██████╗ ████████╗███████╗ ██████╗ ██████╗ ███████╗███╗ ██╗ ██████╗ ███╗ ███╗██╗ ██████╗███████╗ ${c_reset} + ${c_cyan}██╔══██╗██╔══██╗██╔═══██╗╚══██╔══╝██╔════╝██╔═══██╗██╔════╝ ██╔════╝████╗ ██║██╔═══██╗████╗ ████║██║██╔════╝██╔════╝ ${c_reset} + ${c_cyan}██████╔╝██████╔╝██║ ██║ ██║ █████╗ ██║ ██║██║ ███╗█████╗ ██╔██╗ ██║██║ ██║██╔████╔██║██║██║ ███████╗ ${c_reset} + ${c_cyan}██╔═══╝ ██╔══██╗██║ ██║ ██║ ██╔══╝ ██║ ██║██║ ██║██╔══╝ ██║╚██╗██║██║ ██║██║╚██╔╝██║██║██║ ╚════██║ ${c_reset} + ${c_cyan}██║ ██║ ██║╚██████╔╝ ██║ ███████╗╚██████╔╝╚██████╔╝███████╗██║ ╚████║╚██████╔╝██║ ╚═╝ ██║██║╚██████╗███████║ ${c_reset} + ${c_cyan}╚═╝ ╚═╝ ╚═╝ ╚═════╝ ╚═╝ ╚══════╝ ╚═════╝ ╚═════╝ ╚══════╝╚═╝ ╚═══╝ ╚═════╝ ╚═╝ ╚═╝╚═╝ ╚═════╝╚══════╝ ${c_reset} + ${c_cyan} -${c_dim}--------------------------------------------------${c_reset}- """.stripIndent() } - -def checkHostname() { - def c_reset = params.monochrome_logs ? '' : "\033[0m" - def c_white = params.monochrome_logs ? '' : "\033[0;37m" - def c_red = params.monochrome_logs ? '' : "\033[1;91m" - def c_yellow_bold = params.monochrome_logs ? '' : "\033[1;93m" - if (params.hostnames) { - def hostname = "hostname".execute().text.trim() - params.hostnames.each { prof, hnames -> - hnames.each { hname -> - if (hostname.contains(hname) && !workflow.profile.contains(prof)) { - log.error "====================================================\n" + - " ${c_red}WARNING!${c_reset} You are running with `-profile $workflow.profile`\n" + - " but your machine hostname is ${c_white}'$hostname'${c_reset}\n" + - " ${c_yellow_bold}It's highly recommended that you use `-profile $prof${c_reset}`\n" + - "============================================================" - } - } - } - } -} From eac977e641972b018387a0c58df188320238b0a1 Mon Sep 17 00:00:00 2001 From: cgpu Date: Sat, 7 Nov 2020 17:05:32 -0500 Subject: [PATCH 03/36] Updates nextflow.config --- nextflow.config | 60 ++++++++----------------------------------------- 1 file changed, 9 insertions(+), 51 deletions(-) diff --git a/nextflow.config b/nextflow.config index 46a291c..4534b54 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,6 +1,6 @@ /* * ------------------------------------------------- - * nf-core/proteogenomics Nextflow config file + * sheynkman-lab/Long-Read-Proteogenomics Nextflow config file * ------------------------------------------------- * Default config options for all environments. */ @@ -9,31 +9,11 @@ params { // Workflow flags - // TODO nf-core: Specify your pipeline's command line flags - genome = false - input = "data/*{1,2}.fastq.gz" - single_end = false outdir = './results' publish_dir_mode = 'copy' // Boilerplate options - name = false - multiqc_config = false - email = false - email_on_fail = false - max_multiqc_email_size = 25.MB - plaintext_email = false - monochrome_logs = false help = false - igenomes_base = 's3://ngi-igenomes/igenomes/' - tracedir = "${params.outdir}/pipeline_info" - igenomes_ignore = false - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - hostnames = false - config_profile_description = false - config_profile_contact = false - config_profile_url = false // Defaults only, expecting to be overwritten max_memory = 128.GB @@ -44,29 +24,12 @@ params { // Container slug. Stable releases should specify release tag! // Developmental code should specify :dev -process.container = 'nfcore/proteogenomics:dev' - -// Load base.config by default for all pipelines -includeConfig 'conf/base.config' - -// Load nf-core custom profiles from different Institutions -try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") -} +process.container = 'sheynkmanlab/proteogenomics-base:dev' profiles { - conda { process.conda = "$baseDir/environment.yml" } - debug { process.beforeScript = 'echo $HOSTNAME' } - docker { - docker.enabled = true - // Avoid this error: - // WARNING: Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap. - // Testing this in nf-core after discussion here https://github.com/nf-core/tools/pull/351 - // once this is established and works well, nextflow might implement this behavior as new default. - docker.runOptions = '-u \$(id -u):\$(id -g)' - } + docker { + docker.enabled = true + } singularity { singularity.enabled = true singularity.autoMounts = true @@ -77,11 +40,6 @@ profiles { test { includeConfig 'conf/test.config' } } -// Load igenomes.config if required -if (!params.igenomes_ignore) { - includeConfig 'conf/igenomes.config' -} - // Export these variables to prevent local Python/R libraries from conflicting with those in the container env { PYTHONNOUSERSITE = 1 @@ -110,10 +68,10 @@ dag { } manifest { - name = 'nf-core/proteogenomics' - author = 'enomicsSheynkman Group and Smith Group' - homePage = 'https://github.com/nf-core/proteogenomics' - description = 'A workflow for delineating the Human Proteome at Isoform Resolution Through Long-read Proteog' + name = 'sheynkman-lab/Long-Read-Proteogenomics' + author = 'Sheynkman Group and Smith Group' + homePage = 'https://github.com/sheynkman-lab/Long-Read-Proteogenomics' + description = 'A workflow for delineating the Human Proteome at Isoform Resolution through Long-read Proteog' mainScript = 'main.nf' nextflowVersion = '>=19.10.0' version = '1.0dev' From 6bcc81afd14afee3c3ae73671787c5b0374fdd48 Mon Sep 17 00:00:00 2001 From: cgpu Date: Sat, 7 Nov 2020 17:11:20 -0500 Subject: [PATCH 04/36] Adds Dockerfile and env yaml updates --- Dockerfile | 22 +++++++++++++++++----- environment.yml | 2 +- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 883a854..9908de1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,17 +1,29 @@ -FROM nfcore/base:1.11 -LABEL authors="enomicsSheynkman Group and Smith Group" \ - description="Docker image containing all software requirements for the nf-core/proteogenomics pipeline" +# nf-core style template for Dockerfile +FROM continuumio/miniconda3:4.8.2 +LABEL description="Base docker image containing util software requirements for the sheynkman-lab/Long-Read-Proteogenomics pipeline" + +# Install procps so that Nextflow can poll CPU usage and +# deep clean the apt cache to reduce image/layer size +RUN apt-get update \ + && apt-get install -y procps \ + && apt-get clean -y && rm -rf /var/lib/apt/lists/* # Install the conda environment COPY environment.yml / RUN conda env create --quiet -f /environment.yml && conda clean -a # Add conda installation dir to PATH (instead of doing 'conda activate') -ENV PATH /opt/conda/envs/nf-core-proteogenomics-1.0dev/bin:$PATH +ENV PATH /opt/conda/envs/proteogenomics-base/bin:$PATH # Dump the details of the installed packages to a file for posterity -RUN conda env export --name nf-core-proteogenomics-1.0dev > nf-core-proteogenomics-1.0dev.yml +RUN conda env export --name proteogenomics-base > proteogenomics-base.yml # Instruct R processes to use these empty files instead of clashing with a local version RUN touch .Rprofile RUN touch .Renviron + +# Copy additonal scripts from bin and add to PATH +RUN mkdir /opt/bin +COPY bin/* /opt/bin/ +RUN chmod +x /opt/bin/* +ENV PATH="$PATH:/opt/bin/" \ No newline at end of file diff --git a/environment.yml b/environment.yml index a588315..bcbcb35 100644 --- a/environment.yml +++ b/environment.yml @@ -1,6 +1,6 @@ # You can use this file to create a conda environment for this pipeline: # conda env create -f environment.yml -name: nf-core-proteogenomics-1.0dev +name: proteogenomics-base channels: - conda-forge - bioconda From 90758364031fb821d934057fac8d2611310b5be2 Mon Sep 17 00:00:00 2001 From: cgpu Date: Sat, 7 Nov 2020 17:12:10 -0500 Subject: [PATCH 05/36] Removes redundant files from assets --- assets/email_template.html | 54 ------------------------- assets/email_template.txt | 40 ------------------ assets/multiqc_config.yaml | 11 ----- assets/nf-core-proteogenomics_logo.png | Bin 10806 -> 0 bytes assets/sendmail_template.txt | 53 ------------------------ 5 files changed, 158 deletions(-) delete mode 100644 assets/email_template.html delete mode 100644 assets/email_template.txt delete mode 100644 assets/multiqc_config.yaml delete mode 100644 assets/nf-core-proteogenomics_logo.png delete mode 100644 assets/sendmail_template.txt diff --git a/assets/email_template.html b/assets/email_template.html deleted file mode 100644 index 8e885e7..0000000 --- a/assets/email_template.html +++ /dev/null @@ -1,54 +0,0 @@ - - - - - - - - - nf-core/proteogenomics Pipeline Report - - -
- - - -

nf-core/proteogenomics v${version}

-

Run Name: $runName

- -<% if (!success){ - out << """ -
-

nf-core/proteogenomics execution completed unsuccessfully!

-

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

-

The full error message was:

-
${errorReport}
-
- """ -} else { - out << """ -
- nf-core/proteogenomics execution completed successfully! -
- """ -} -%> - -

The workflow was completed at $dateComplete (duration: $duration)

-

The command used to launch the workflow was as follows:

-
$commandLine
- -

Pipeline Configuration:

- - - <% out << summary.collect{ k,v -> "" }.join("\n") %> - -
$k
$v
- -

nf-core/proteogenomics

-

https://github.com/nf-core/proteogenomics

- -
- - - diff --git a/assets/email_template.txt b/assets/email_template.txt deleted file mode 100644 index c38b428..0000000 --- a/assets/email_template.txt +++ /dev/null @@ -1,40 +0,0 @@ ----------------------------------------------------- - ,--./,-. - ___ __ __ __ ___ /,-._.--~\\ - |\\ | |__ __ / ` / \\ |__) |__ } { - | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, - `._,._,' - nf-core/proteogenomics v${version} ----------------------------------------------------- - -Run Name: $runName - -<% if (success){ - out << "## nf-core/proteogenomics execution completed successfully! ##" -} else { - out << """#################################################### -## nf-core/proteogenomics execution completed unsuccessfully! ## -#################################################### -The exit status of the task that caused the workflow execution to fail was: $exitStatus. -The full error message was: - -${errorReport} -""" -} %> - - -The workflow was completed at $dateComplete (duration: $duration) - -The command used to launch the workflow was as follows: - - $commandLine - - - -Pipeline Configuration: ------------------------ -<% out << summary.collect{ k,v -> " - $k: $v" }.join("\n") %> - --- -nf-core/proteogenomics -https://github.com/nf-core/proteogenomics diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml deleted file mode 100644 index 02f0a9e..0000000 --- a/assets/multiqc_config.yaml +++ /dev/null @@ -1,11 +0,0 @@ -report_comment: > - This report has been generated by the nf-core/proteogenomics - analysis pipeline. For information about how to interpret these results, please see the - documentation. -report_section_order: - software_versions: - order: -1000 - nf-core-proteogenomics-summary: - order: -1001 - -export_plots: true diff --git a/assets/nf-core-proteogenomics_logo.png b/assets/nf-core-proteogenomics_logo.png deleted file mode 100644 index 138594026753e7e9779e06bb7b1abe8b443f34a4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 10806 zcmb`NRZtvEu(ok`cUT|(C+RtG zx@AYB!({_e;1aK}@~w{b!~U{nD39qS>l|PiuxIpVi9bgu*UV%01(`*Ks}h+Uf0|Nf z6Dv>atl$xDc4GUD4<6!)yAzaR+oRv8>SJLoaSigi_po^_Jh0QR2~AB61|RGsGNiW-ZT(ynjJobB$xfw7)>&Hms{IRH zB;THK^J?p`MMTZ^)9xozk=+}Wy;+`4q2%gfk(eqn=}q|h?cdzW9Q{W>)JfUI*okec z@aY2A0$K7O5|!Ie$V(q^;rZ2WZMES_KWUbJ)UNrRXZOTBZM?i@yoUfkIvb*@W`vvu{XAg3;t7G)85C$04uU5Tvgy1(h?*X&*fdzK3gZN%{0V%7r<)UuPS^04n*_8)Q$;{1R`eN2B`U zJBhsek&X+l#!fS?7Ng9y0n273-^?EQ z0-@frHFXL8C}`kdzRjuVej(B6PeZ}Pc8=fmG>kQkf2?)n8wp+4iZ_+rGj;QOI--tQ zS^o-@-DXyZ@|fK_(vD=d^3>UG1YdW{Xj8l99 zh=3z~sXWaa_REe!Q^+na$2<31pgjYBr)Z6Kw$)Ms?le?Zc6!B$Vkm<7x`(^w7pdtJ z6bcp{BN^ML0l_sj*NKuOYLCiSmi*OE{50q<8TD7a#5vIS^K9*>e8<>+VTrw6{H8Oz zu2-s&tJWK)tZ}YhBv7qr+Aa;HLh{%~o4+^z z8z}Y@ve4lQ2jyd(|MOt(_Gs@A&!L);uXdO=M%f^ODhmf~LUFLXNUQ5=qqBSMR7WmY z%Mb2IsMZ$OB$#xX&%n3r1?L4516z1Wvp^G0Fx?AfU?*}S7MHL6Vf4>AKkxQ9bV$3E zrTW0PV2Y%Iw=yj!Y#RvAv~|cd&C)ldMG0zY53ny*aP`~SD2`t84)|KKYd?mnK*zbz zv$XuY7Uzo)Cfc_iN4sk8U;G}(ozWw#r@GJXyt?f5d8#oAi+moUx^w{`mK`A&u^}=5!@oc%8;L60m(yo%^%H4++Ta2di9)I* zco(6YyXCwsbq1WLj|fHKQ0)F75;bORa(>KkH;KC)L^LJQ8%vHM%*G4^ofjg0+d4hU|Yi z-=RYmp?Ok`q6?8#$?`C+ui41+2h+*scb|FuG%4NY?K!q{NyIy*l4GLQj$h_*=8Yd- z$*#Xp-WXR()ch`z(KFb_dsY5i-k!4V8pcIYOB;2SY8(fqG~;%ouV?J>Ti{VFY`=Uh1K+)Ea@g49c|N6Uap)LjZaOBTwFEs7Yal^R9)(-XQd^gqYg2;_(uhNe7gM9g+T zJ!^ll*O)A=m2ytBKipiq`qB)QWulZn3+enDc{VU)QlAPVk5nqO<@_a`*!-sw4Q+i?p zvu`E+_gsTQdcSj%Dbz!ml4k62>5sFn!H0r z>q#i{LK}=`cVZc?Su4L$7fbgjW%a^ERA|Yw?4&5#g=!8cK$Kw`+&m|vUq+fgW}WTJ zdhvN9N!CTA{bV3IalhXN@2#2$XeWsewd_~uN+B=w9St{#ujkKiIc2&X5($ylbhxZ4 z+_qxpBTh*$tCW)R_cHx3#kS1K`VJ`JuWG$IKPop3;K^QzG;$!!{!RO<(U~9~E@e{v zmWLMsJ}sNfK}{yCQ1Epcl?dge44tTUZ3Hk?aI zmS$9E=DpU^?4#&~8`BiKX#5$pPq)aBxPc@G5<_ls6N=r`;q16V5JbR*bA;TA6W?L% z7CcDwg%C-2&moFMh$#xrZip(CtVg2F>P2IeZ>QK|Z@KpQ;$znGS-kT`ag%tl=0z4t z>R+vS%s$~{3}NG%?w0)7WlO4Acuj(hL8Tb{N-3HMwTQ`yjL`mR&2#b*-A>4{U#NoZ z%Vt61Y06{%u}SW~@!Le(n;T2_v++L*46%_pubQ^EjqxunCm&3Li)Q&2g} z$vFR*!={<^5LvQPG3z?t&Nh{W+c$|zJC&tYiNdGtWZ(J8G3jFLOL*7c$FArHo>8H*(U`d4aT#U!b+nE>^9`8|OrJi zX1R{z_jl?a23OIk!uYy7*cFH_DiD_N*}tlnyTM-|oE<;{FXk>C+r?>QiGFvK#HNG$ zpQr~PMw1CyfMGxsoH5Z%h0zMEeH*G1#mX@+1ui{wew_K8_5jW#k<*@ly_|zNt`yPh zvL#=id6yBZc0%kuHK*O>lZoz`rDW~1d-ue=ra3={bOuLx&0uuJ{4##bTqEc)!qG!a zQ|9;a3}d%dWaYB*o6_)cjkotC*cOT2r)lkT1gVMdavTE*j{V)*?^lwrlhu&Wiw=r% z7G20}>x`PFV(r0UWkl8J98^@~{V^PUEu#J7^U53ukZL_fc{+XHMjS>zqp7MkXOJp7 zuhTSO;NU|Ruk+q>mUYvlVGD8(3n~`JyWHPkDow$m!0#9t`6mgYrSE6VBpDaKRVCvT z^XQ<-E*Ih4cmRR%j(7*PusWth6V1vx;tl+f)#N`VqR~PRei$xa!WecBu%anhRwB@< z{0sw_st=)!Q{EYM1-$E`T_Ks!JWr$IE?-kmje*n~VM+4#PnD{MuYZW+3F2(|j>Hv& z|I1Eq*MnLwe?#`bPuD`i4*Jb+p1D5D9wpkDIC;Ux8-?)I%h$DEUVDsT9ki9F+xNd| z#~V2a=FlSZDw%PcZZ;Q$F;`jx)jJGrSDw??uSkVP7dG$5R z#v(7YKCP^%%r%~0dcG)7eJ7f8QDEUs!0aEbJRWW2vsNqp=$-BW~Oy(fFK-C{F#754{-5Vwjy0BcXocl)5o;!v$<9O}r|@ zImS$nn>dC1k`I!iYFmh0Mys>b2^a~#)*wKN=>8}OHH&?auGx!P}f^L zsFwp`2lbZpTKryi<2amGrSOy$ul!0yob;>to-Fzl#Q?2KzWwKqb=ZjpIXPzY$b7ql zn!*fdj3^C(L!DS$hjV8-T^S~yK3QS?m!9Nus(Rt)bjje^yRuJ%Yb^)9f=%a}*6lsd z%w@$~`hs*~x@z=}CL}ou7}*FI{jQ@`nn0VnsJq-hnpPC?> zeuY&o(fB^jZZPYrg%A38Gg5SyWDI6exLy3KVcc4R%UxuZ66-OZ>tY+MNLN-iJ$be` z93%79i41&Fpv7bf4&e4F2i^lH^-UX?{03_Z$OIRxrfnJqHN>zZY5qhaBCA5oF<*AjgOPpZprQmWH)sU(S`=*`fY{Jq$I24r3Susp)j^}eYD9V{HCTeH z$0bhXUG85Wntgi6*`=&kTi6Jan%EiZhX_Qi4P$FDwZeJ5ko`AdKYKdN&ct#RwB0i^ zX-c^97gySbWX580?#EQs<*f@J`k=ok-dbFqB?2pI)P>?SF3f2_nZSDa$dX3w4lujYIX{m%3)?iJ-je$8ouH#&(`AvAu=>b5>{4b62;h_sw^MwwP5<&i zpXHKe9KWn>%<{K*$mr#Fn&6`&M)*~c_C2#&W)N_gBXdAK{vE^*NvT!z-|@ZHKAcql z{*QX;a;B8oMR^VdE<|6;g6(xuLtGEM7mUxS*$7=&%m0`^(_PV-+C~WFZ+zTku_EsL zMLbGB3MH}NZrtYv3zQ~E#4ID8@>!Ao$V|E5X`mfgt*Dmu5vVqr&G#hI16sZNrC8e7~;2dPHdk=S~(bclfM91Vg%ii3k|8nmz7p9Cj~lEf60 z*<({`Yr=fXfW2EC2E!?K{UxZ6y-hmU_S2874G>rwD)z7UTL3koGnTLAdc06ZwF6J> z8fo<6?TY>9n(Jo4rSa|HFHbjvjRvE#Vr(Ydn3Pd`ZY)COE;X>sT=x}WELsNAf@mJ3<oy z9?4*M@@{Qf_(1OP8ru{bY}z=t0C$9G>lduxcYUUEmtP&}zXc`Rwxfo<=RVEWCT1j@ zlrYjc^`7)dTpM!ID&bdW%|7$qSiX7G(XKV-t*cYipqfcAZVgbG1u>Tev}lh-!vNjs zn*cpAmFtaqAh!Kk6O=lAkzDY@(+xpLqQ`tAU=yXJveiB|5q$BWV)f^M+3LL*p(6z4 zIx|wxD;^)V&d@soxgfV-YBkZ;6@7}@A*A1@+Sh5oy={2E%F9!izJq2qK zt*&;}S`Jxe-f5kdSXAOROQ-c?)SsEObbQfj3vhwsYi{O`d6chJc~jQQt=~F3XyVJT zooI7`155zWKVihvI$G=V(cu>67irLM^$0`4fp(x7O{K3}0Z>L&sCSOT4h6 z$y>jZo{2;$4{c|)rMCCSzuRn|vzYjl^RMZ?6A2}Wz${huB`QDN2HGW2t#IgqN^0Le zmzKW~bmHIf?D#A>H|XZrkDxJT(x}|Z>k&|-Yj0RBkUti!J`;Hz^l~3(%Y|0(M$gZ1 z`2uG?NS{!eJT{cKLC=qt78^Z3O+-&QtwRco^kMTtaW@4VY8ucb&zi~huPqURlppYB zZ%8@LEs(+1j(hCUl<`j}mkLJKim`aAj$R&pl3m646Eyd^l>30L&xfFcKm0wx6N0xc zR_$gZ4Kn8f`Lh(ruR4n{jZU5h@D|HC?vUsQQvW?R*UrD0`wZ!ZHEF3sDIw9!($J4D z9YOPFV=5qR^QtS3jPQDd!AAaV1X4W_!jS5U7?O{Vj4qRh59VFpcb=%5t&`Thp@CEx zRlstG9&BTU_YMBrEe7l}2M_(u8$!@)3V*f9EtY&wNRysGXrjwY=W@3jB9z#k%6qa( zk)7QJJ9UTv{2yeE(yR@^8{#b}H)czm>`b40U3wIElkk-;x*#FmHKA4YbuWZlJ(48lk5nfL-UkPkGMh?OHvP$0)9~1@vm9+0;sH~G z{xg2Ar__VENhE5IA*?bY2puxzd-->fbh-oq! z%@yC)MO^=Okml|6Ly2oNq?<%3-ovzh3~5RFPVd!ZHJM4p%(U9o%4#3SSK%Uy@vqG7ZH76-CO=RJ#}U(x%_v4!m{L$^!G?GTv|uB9mUk@Rfm*yn4!F)c79 z$n9hbinu6kQR@>9^}RF@Y;0bgLAOitm35OW8|`V6^BJ=EyarzDR(ji1eiO!wl2-j3F1)F=BP`-CZq8%frjl{Pd^p zDgr$sD6k2mf8w=C+~T;Xm4aU&do@!9r_!Tsh@QdTjhjX(K^2gIdH`jsQSSm#>d#mu z!w9h@5yrx@=6H1h^;6?h_)c=~And-cN@^Ig z_P4J^3DMF5;f%IV6>x6rw}zB4B6 zDWiBGYU!o~O*@Vsg`Xhfgj(O{FkW&e2u2x=5V2pL-TEo(#(O2j27{cG77~4gP=(P+ z-I(svV+*n~pco+cR5az5ibx`&8DCO9(vpZGMTF3cXcoUtv3FgP-nsA~H%KCP>Y1T@ z29o%AdzZP%R4Q%^N2P1lJ2r_`dJ+9mXzrZv0;FIqa^c>iBWDzdI+%^rn@HrtJ#~iT zCa{abS`9!GbpdM%e3p;2E^>d(>8B`UbR*Z^jQ7uIi{-@gT&DG_9pU4@a7S5ym!yE; zsARc~O9}w40jz=a*cVmr-d?*DkP-|f(o+1;l98(ie2}3eyI!afC?1j|S7e8ONSwl%E#s1p*j-;6|DK`AZr_{MA z1qNgjX-w68Zt{^{FBAj3J2d3Qn0xWy3SeQx^s`EcoI7?%N0PDZ1tpVIzrqam$jG-e zu@`Z=6Gon5=65H%kf+Ral+rfBJy+f0dg(sj4`3-bihhBsJ zR5l~7EC{7Zv$i(fJlETCWLvf0;DmYTbjoPz=%P*)g^@;%c*JB;LVIL7kVe?Ij+e6~ zz7LG*nwHy_wNr|m-sJ!$gd{Xi{Vvhylp(UjKjS^iPm6XVHGpon1L60?7vTf}I}3?vGw)L_Fmh-vM`2A8ONVH* zMeTi-O3AL&7n71;FApY){$TcvAht7a*>-QGdvUMvbJk7h%l6u88&egxc*8=U0^V?rJl$x{U-A0T@svE6gA zBc2KibzXECQQ}HHf4`h5c+i$oZ8t0AA?ML-_HzF7U!tG-n}Ak>4%k#5UWs0hPWav9 zkr(DQtu8fM$|_W7_Nz5+W7Cs&Hj9|m`-d2UKZ8tx4%TzuTjiV!{h(b&>fifmOLpO5 zqV)S)+?pZaoQf9Dg}`R#FO7tGe}GjeV5`%PrXWs zccK{m!4==6P3_c+_(vzr9NIsNC(7vnIdJwBDfyGAFwPm5Y`Y-Z4WF^%-EE+Ql);;Y z`V-|NQ-9zk`G%*2`4=Tw8k>(K^WUNuZolIEJC*J4L7|Fx1;$699gd~={#zoEa0Wku z!o>WKroJNE$yd|^5g38OJ9M}X-|<2)I<%3MBq}Jk{wBPF-(~8J>nW`ZL?)_+WfAR# zVxb470FHJfZbRqXUb`aoN9v1ivn9-31XrNhZm^yv?9FE+a0K6t^jnx2uOA{;#px21 z!>GtoO|xDmHSi+&6U>L%SqA00As)z0aOYL79V?WnwsUqz#sKpOEY>)DI*u&Zog~<$ zzhLlldp3J#v6J}$oz9H8Dn$Z*+LwBHW6s@5hM0aJN4z!HK+9vw0U04RlEyXMlW`F_ zYxpzvQV2PLPs-+>UWvW%@}Tbaei8=`L>d8;*S8xN$EGAM7s3KZxC&Uhv}f?cR-`?9 zmF-`*Z{O7~FC(z|bG$UhA8hQ%y<`vZ_`t8*1coNE>JLc@Iz(tHd*SSe3<1h-js#_DjVd&cafcN|LLV9vYj$Tp|7eMXZIrR z0hBw+A39pU;O44%TCEE#uj6HL`942T8pN4lu~MFeg<0JDGkv)^;&7dDYxSf>SD(tg zD--^8czOO;0DZX^spVpw0Dv|6GQ}`0CIvYbSG)fkL#6{6D9j&8`+HS9bJng}$%7M* z+F!qr>{0pO{WaPnq+~d|!gJq3qJfDj46?BDgZo7SC>MX&Z8|_5S7$&xHS6%%7=QaL zsGs=D@M1#cWVBkWVi_dmmT?f_!l(3-w~*}6yaf$HGteTm_b%cCM?l=4ZS=0XJOsw? z{+12@Fx}^x2DMCa-dro24cYr-s|t@b#~$9@08hCcG*+=Ymad$gC$bKmOiI4SgmuNE zErAV2#FPEM3#2)G8#Jr6+R3^3H8h_kPi$8H*e}tj&EiYGBUtXvOpnRJr6bOvPt9qY zt*2T-p*V0_(|}QY*aOW#*LjZh)Z1Z;q!dM(2?yFovHV4>B%uZ?&*;_C*o06>`-8tD z@%13NN?UfI>0p^ARts=utlXdRU5H7GUy%%svSR7@??@(B;-}+D^d#0C_5nnI707ZnxbN93$O`WSF(j?-=RCZr*WB~)xGmh{pib<@n zmGIQSnl!PKYQo$Lx!)F(c|Rm0j5T~7u`}cUfoZI|Ebs%Dl-nAL&7Cg+$4y))-%Jh~ zV9^fK9U#hRyVu0i3TP^`P}vRxyHh!|le8)n=`PauY|%}Eg?l_Qmu9=6qB2`?TUu?J zrXNhIWQkHs@-wlQZU9ExmI29*EK|egC7?a5C7<^O?P)R;+|XX6JHC1bH@;ndeLB57 zc@Cy(6!;75@XJjEjYKILppADEckU=iDPV4^{+lDi6PSne*rN4kVdCqd3!z$cv)Ya~ z+UCGMBKPCgFRr?UEXrcf*igP__Fa4M7;{jthH%YJ+@|tCi+f}sE{$S+7XcpB>Mugb z@scdSoCMGLQkW|Q8r5tbU)==~g{ZG`0M1FR`Ltc&Q(cp?r%c8RGzUn?hle7NV9crD z4wLDhlJ5e=_l4)O(oM{Dj6<+%KAZLfA8O@&f_NwND5L*KLGLB}PT@O~9#2qn=2$wC@d2ON z3uj+9MW~}KdZRi6*dEMPHGozt$pIeA3*D;od}Prc*1DpR4ndag<@)l@iEd{RM3s~k zw;D^duV7^$hD~miy`m#!A2%xVJ1lDe`FKNsaGA%OhDYv5jv^WnLX%ag)r)#*;`zHX zb_`#-n@Y=%Ixgt0VrTW1PJ39o6B+f~+*^dl$T0jU3cmP5J_xI-)%^^RyqV<}W6Gg_M%Q49b9obdDkiJ{ZJvk6BVcMPS# z&+-Am-XjRV#UVIhf~kS!V6n7ZN;IF_wa5ekQhEv4BE9%R52)#bph*?Wnuwj_riWET zC*2>2`VflMU>C}P6hENkZ#LFq51n){Hor`T89k$&{SE)0T3Au1ezi&9^1!}V^zI!&#oJ@%!cSRY#ZFmj*!8?2a4M|Mf z-nA!~bdK2{%i{r+UT!O(w*vb>%6^iFQl@}^1Y{|0liWQ-Q+`mVoyXD|RZ1zKX6GSA zHZP6tS#H^Ah=L8SoUnld^(!YsNlX=y z_((5-drtOiw43c0W5RL~AD&(N{&ssg8=oXuT_i;d0q;{AHbOd13w|r@v-J4a(lhMp z@H>gdB6XtvfKO57kDs}RZMQ^~EVWoUE#gCTMuIiyspCCrD}Pmx3T4$8gT)epJl)e^ z&m%TE3=MF8eADPb(T91xKn8?bI~|wArCcRPw$WnDg?V^6D88$EmKiGEY`X2*l} z{Cc-%_%41BDnL&=#c}P+vPQcgVg=CEE%L=IM&-%o*xR-c7{j(?upsg0FXL}qSoyXF zMjN$w=DUV}#m$oS5Bid5vxrzIBjP*M%ElHvncj$F&wtUoZ9=O@#6&%^-RBBYiw^X< zdBBczO@^HdTTOXEx6jE6(5+rx6~X-4X~LuWki-%dcO5sT;@>!`Y=@AytD-`mUP4xK ztD}vKaADD@7w9>@mLlzCvZwQ8BajH*3i_xdqCsaf{+WNpb{6!H7rU}b>lQzh)Sc9w zbrS_+iqNEP089Na#hY4sv!r(_z>0F6G`3tafKaQSv=QO0v+W!OjjnOt`zAjw+g~9$o-2=+C%CWE6N+aT);6 z`!&=Ce8aw5IG!C%I}JZp3w%Cge-u>z=r(+DpeT>N@!_EZs+P#4+^8dJX?H-%g@fWa zrKk$`ErQ{d#E2EW&pyaWXPRbUiFIbB|Z+Y7z zi&5=u_373>0T^!afAAr6bcEkG8hCI!-&3?+Hx5~x7ya>iBOqi$emKr3N!%u(EhF6q z?0@<6!VDBBH5mSG79;^vq82vM!5rPnf*;Z2CF8Be4^5bTUHD2+As&56)!f(hVD!;$cX-o{!MCSl# zz+OW9215R_s^ZOk5`CyQh+|A*sV+mRqY^x(G;h#FAT=!X;w%YOl>ScvNz^AX(wX=H zx8(k(=tyVj*JloP5>?P(1Hr0AB@#|!X9!BM+&a2pFs#FFU;Zhz#=^~$R`iqg|4uoX dF*E*(fEzq-B&Gg?_Du*1r!227S1V%{{y)_Jwn6{^ diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt deleted file mode 100644 index 7834b69..0000000 --- a/assets/sendmail_template.txt +++ /dev/null @@ -1,53 +0,0 @@ -To: $email -Subject: $subject -Mime-Version: 1.0 -Content-Type: multipart/related;boundary="nfcoremimeboundary" - ---nfcoremimeboundary -Content-Type: text/html; charset=utf-8 - -$email_html - ---nfcoremimeboundary -Content-Type: image/png;name="nf-core-proteogenomics_logo.png" -Content-Transfer-Encoding: base64 -Content-ID: -Content-Disposition: inline; filename="nf-core-proteogenomics_logo.png" - -<% out << new File("$baseDir/assets/nf-core-proteogenomics_logo.png"). - bytes. - encodeBase64(). - toString(). - tokenize( '\n' )*. - toList()*. - collate( 76 )*. - collect { it.join() }. - flatten(). - join( '\n' ) %> - -<% -if (mqcFile){ -def mqcFileObj = new File("$mqcFile") -if (mqcFileObj.length() < mqcMaxSize){ -out << """ ---nfcoremimeboundary -Content-Type: text/html; name=\"multiqc_report\" -Content-Transfer-Encoding: base64 -Content-ID: -Content-Disposition: attachment; filename=\"${mqcFileObj.getName()}\" - -${mqcFileObj. - bytes. - encodeBase64(). - toString(). - tokenize( '\n' )*. - toList()*. - collate( 76 )*. - collect { it.join() }. - flatten(). - join( '\n' )} -""" -}} -%> - ---nfcoremimeboundary-- From 780a1156276673ae0b929613c43e7d275b26601c Mon Sep 17 00:00:00 2001 From: cgpu Date: Sat, 7 Nov 2020 17:13:51 -0500 Subject: [PATCH 06/36] Deleted nf schema json --- nextflow_schema.json | 259 ------------------------------------------- 1 file changed, 259 deletions(-) delete mode 100644 nextflow_schema.json diff --git a/nextflow_schema.json b/nextflow_schema.json deleted file mode 100644 index fcdcc24..0000000 --- a/nextflow_schema.json +++ /dev/null @@ -1,259 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/nf-core/proteogenomics/master/nextflow_schema.json", - "title": "nf-core/proteogenomics pipeline parameters", - "description": "A workflow for delineating the Human Proteome at Isoform Resolution Through Long-read Proteog", - "type": "object", - "definitions": { - "input_output_options": { - "title": "Input/output options", - "type": "object", - "fa_icon": "fas fa-terminal", - "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input" - ], - "properties": { - "input": { - "type": "string", - "fa_icon": "fas fa-dna", - "description": "Input FastQ files.", - "help_text": "Use this to specify the location of your input FastQ files. For example:\n\n```bash\n--input 'path/to/data/sample_*_{1,2}.fastq'\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The path must have at least one `*` wildcard character\n3. When using the pipeline with paired end data, the path must use `{1,2}` notation to specify read pairs.\n\nIf left unspecified, a default pattern is used: `data/*{1,2}.fastq.gz`" - }, - "single_end": { - "type": "boolean", - "description": "Specifies that the input is single-end reads.", - "fa_icon": "fas fa-align-center", - "help_text": "By default, the pipeline expects paired-end data. If you have single-end data, you need to specify `--single_end` on the command line when you launch the pipeline. A normal glob pattern, enclosed in quotation marks, can then be used for `--input`. For example:\n\n```bash\n--single_end --input '*.fastq'\n```\n\nIt is not possible to run a mixture of single-end and paired-end files in one run." - }, - "outdir": { - "type": "string", - "description": "The output directory where the results will be saved.", - "default": "./results", - "fa_icon": "fas fa-folder-open" - }, - "email": { - "type": "string", - "description": "Email address for completion summary.", - "fa_icon": "fas fa-envelope", - "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" - } - } - }, - "reference_genome_options": { - "title": "Reference genome options", - "type": "object", - "fa_icon": "fas fa-dna", - "description": "Options for the reference genome indices used to align reads.", - "properties": { - "genome": { - "type": "string", - "description": "Name of iGenomes reference.", - "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`.\n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." - }, - "fasta": { - "type": "string", - "fa_icon": "fas fa-font", - "description": "Path to FASTA genome file.", - "help_text": "If you have no genome reference available, the pipeline can build one using a FASTA file. This requires additional time and resources, so it's better to use a pre-build index if possible." - }, - "igenomes_base": { - "type": "string", - "description": "Directory / URL base for iGenomes references.", - "default": "s3://ngi-igenomes/igenomes/", - "fa_icon": "fas fa-cloud-download-alt", - "hidden": true - }, - "igenomes_ignore": { - "type": "boolean", - "description": "Do not load the iGenomes reference config.", - "fa_icon": "fas fa-ban", - "hidden": true, - "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." - } - } - }, - "generic_options": { - "title": "Generic options", - "type": "object", - "fa_icon": "fas fa-file-import", - "description": "Less common options for the pipeline, typically set in a config file.", - "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", - "properties": { - "help": { - "type": "boolean", - "description": "Display help text.", - "hidden": true, - "fa_icon": "fas fa-question-circle" - }, - "publish_dir_mode": { - "type": "string", - "default": "copy", - "hidden": true, - "description": "Method used to save pipeline results to output directory.", - "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", - "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ] - }, - "name": { - "type": "string", - "description": "Workflow name.", - "fa_icon": "fas fa-fingerprint", - "hidden": true, - "help_text": "A custom name for the pipeline run. Unlike the core nextflow `-name` option with one hyphen this parameter can be reused multiple times, for example if using `-resume`. Passed through to steps such as MultiQC and used for things like report filenames and titles." - }, - "email_on_fail": { - "type": "string", - "description": "Email address for completion summary, only when pipeline fails.", - "fa_icon": "fas fa-exclamation-triangle", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", - "hidden": true, - "help_text": "This works exactly as with `--email`, except emails are only sent if the workflow is not successful." - }, - "plaintext_email": { - "type": "boolean", - "description": "Send plain-text email instead of HTML.", - "fa_icon": "fas fa-remove-format", - "hidden": true, - "help_text": "Set to receive plain-text e-mails instead of HTML formatted." - }, - "max_multiqc_email_size": { - "type": "string", - "description": "File size limit when attaching MultiQC reports to summary emails.", - "default": "25.MB", - "fa_icon": "fas fa-file-upload", - "hidden": true, - "help_text": "If file generated by pipeline exceeds the threshold, it will not be attached." - }, - "monochrome_logs": { - "type": "boolean", - "description": "Do not use coloured log outputs.", - "fa_icon": "fas fa-palette", - "hidden": true, - "help_text": "Set to disable colourful command line output and live life in monochrome." - }, - "multiqc_config": { - "type": "string", - "description": "Custom config file to supply to MultiQC.", - "fa_icon": "fas fa-cog", - "hidden": true - }, - "tracedir": { - "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs", - "hidden": true - } - } - }, - "max_job_request_options": { - "title": "Max job request options", - "type": "object", - "fa_icon": "fab fa-acquisitions-incorporated", - "description": "Set the top limit for requested resources for any single job.", - "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", - "properties": { - "max_cpus": { - "type": "integer", - "description": "Maximum number of CPUs that can be requested for any single job.", - "default": 16, - "fa_icon": "fas fa-microchip", - "hidden": true, - "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" - }, - "max_memory": { - "type": "string", - "description": "Maximum amount of memory that can be requested for any single job.", - "default": "128.GB", - "fa_icon": "fas fa-memory", - "hidden": true, - "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" - }, - "max_time": { - "type": "string", - "description": "Maximum amount of time that can be requested for any single job.", - "default": "240.h", - "fa_icon": "far fa-clock", - "hidden": true, - "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" - } - } - }, - "institutional_config_options": { - "title": "Institutional config options", - "type": "object", - "fa_icon": "fas fa-university", - "description": "Parameters used to describe centralised config profiles. These should not be edited.", - "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", - "properties": { - "custom_config_version": { - "type": "string", - "description": "Git commit id for Institutional configs.", - "default": "master", - "hidden": true, - "fa_icon": "fas fa-users-cog", - "help_text": "Provide git commit id for custom Institutional configs hosted at `nf-core/configs`. This was implemented for reproducibility purposes. Default: `master`.\n\n```bash\n## Download and use config file with following git commit id\n--custom_config_version d52db660777c4bf36546ddb188ec530c3ada1b96\n```" - }, - "custom_config_base": { - "type": "string", - "description": "Base directory for Institutional configs.", - "default": "https://raw.githubusercontent.com/nf-core/configs/master", - "hidden": true, - "help_text": "If you're running offline, nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell nextflow where to find them with the `custom_config_base` option. For example:\n\n```bash\n## Download and unzip the config files\ncd /path/to/my/configs\nwget https://github.com/nf-core/configs/archive/master.zip\nunzip master.zip\n\n## Run the pipeline\ncd /path/to/my/data\nnextflow run /path/to/pipeline/ --custom_config_base /path/to/my/configs/configs-master/\n```\n\n> Note that the nf-core/tools helper package has a `download` command to download all required pipeline files + singularity containers + institutional configs in one go for you, to make this process easier.", - "fa_icon": "fas fa-users-cog" - }, - "hostnames": { - "type": "string", - "description": "Institutional configs hostname.", - "hidden": true, - "fa_icon": "fas fa-users-cog" - }, - "config_profile_description": { - "type": "string", - "description": "Institutional config description.", - "hidden": true, - "fa_icon": "fas fa-users-cog" - }, - "config_profile_contact": { - "type": "string", - "description": "Institutional config contact information.", - "hidden": true, - "fa_icon": "fas fa-users-cog" - }, - "config_profile_url": { - "type": "string", - "description": "Institutional config URL link.", - "hidden": true, - "fa_icon": "fas fa-users-cog" - } - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/input_output_options" - }, - { - "$ref": "#/definitions/reference_genome_options" - }, - { - "$ref": "#/definitions/generic_options" - }, - { - "$ref": "#/definitions/max_job_request_options" - }, - { - "$ref": "#/definitions/institutional_config_options" - } - ] -} From 420340e8da6ba4cdccde50dfaff963513f639206 Mon Sep 17 00:00:00 2001 From: cgpu Date: Sat, 7 Nov 2020 17:15:14 -0500 Subject: [PATCH 07/36] Removes redundant configs --- conf/base.config | 51 ------------------------------------------- conf/test_full.config | 22 ------------------- 2 files changed, 73 deletions(-) delete mode 100644 conf/base.config delete mode 100644 conf/test_full.config diff --git a/conf/base.config b/conf/base.config deleted file mode 100644 index c81f6bd..0000000 --- a/conf/base.config +++ /dev/null @@ -1,51 +0,0 @@ -/* - * ------------------------------------------------- - * nf-core/proteogenomics Nextflow base config file - * ------------------------------------------------- - * A 'blank slate' config file, appropriate for general - * use on most high performace compute environments. - * Assumes that all software is installed and available - * on the PATH. Runs in `local` mode - all jobs will be - * run on the logged in environment. - */ - -process { - - // TODO nf-core: Check the defaults for all processes - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 7.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } - maxRetries = 1 - maxErrors = '-1' - - // Process-specific resource requirements - // NOTE - Only one of the labels below are used in the fastqc process in the main script. - // If possible, it would be nice to keep the same label naming convention when - // adding in your processes. - // TODO nf-core: Customise requirements for specific processes. - // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors - withLabel:process_low { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 14.GB * task.attempt, 'memory' ) } - time = { check_max( 6.h * task.attempt, 'time' ) } - } - withLabel:process_medium { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 42.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } - } - withLabel:process_high { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 84.GB * task.attempt, 'memory' ) } - time = { check_max( 10.h * task.attempt, 'time' ) } - } - withLabel:process_long { - time = { check_max( 20.h * task.attempt, 'time' ) } - } - withName:get_software_versions { - cache = false - } - -} diff --git a/conf/test_full.config b/conf/test_full.config deleted file mode 100644 index 08642ae..0000000 --- a/conf/test_full.config +++ /dev/null @@ -1,22 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for running full-size tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a full size pipeline test. Use as follows: - * nextflow run nf-core/proteogenomics -profile test_full, - */ - -params { - config_profile_name = 'Full test profile' - config_profile_description = 'Full test dataset to check pipeline function' - - // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed - single_end = false - input_paths = [ - ['Testdata', ['https://github.com/nf-core/test-datasets/raw/exoseq/testdata/Testdata_R1.tiny.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/exoseq/testdata/Testdata_R2.tiny.fastq.gz']], - ['SRR389222', ['https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub2.fastq.gz']] - ] -} From 4495b0550c83026d12193e9df58f8524357de128 Mon Sep 17 00:00:00 2001 From: cgpu Date: Sat, 7 Nov 2020 17:40:25 -0500 Subject: [PATCH 08/36] Updates README with template structure --- README.md | 65 ++++++++++++++++++++++++++++++++++--------- README_nextflow.md | 69 ---------------------------------------------- 2 files changed, 52 insertions(+), 82 deletions(-) delete mode 100644 README_nextflow.md diff --git a/README.md b/README.md index 7d9896b..43a6f33 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,65 @@ -# Delineation of the Human Proteome at Isoform Resolution Through Long-read Proteogenomics +# sheynkman-lab/Long-Read-Proteogenomics + +**A workflow for delineating the Human Proteome at Isoform Resolution Through Long-read Proteog**. [Cold Spring Harbor Laboratory Biological Data Science Codeathon](https://datascience.nih.gov/news/cold-spring-harbor-laboratory-biological-data-science-codeathon) -# [Get started immediately with the data vignette!](https://github.com/sheynkman-lab/Long-Read-Proteogenomics/wiki/Pipeline-Vignette) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4256568.svg)](https://doi.org/10.5281/zenodo.4256568) -Protein isoforms are the direct translational product of fully spliced mRNA molecules. Protein isoforms can be modified during or subsequent to translation with additional chemical moities (e.g. phosphorylation or acetylation) or they can be cleaved resulting in a [proteoform](https://www.nature.com/articles/nmeth.2369), which is the ultimate biological actor in many important biological processes. At a high level, protein isoforms can be predicted from genomic sequencing data and then observed by mass spectrometry. Despite impressive technological achievements in both realms (sequencing and mass spectrometry), many gaps exist in our ability to comprehensively identify all protein isoforms even for a single sample. Scientists ability to accomplish this goal depends on having detailed an accurate knowledge of all protein coding mRNA isoforms, comprehensive mass spectrometry data covering at least one unique region of each protein isoform, and a protein inference algorithm that can faithfully and accurately attribute observed peptides to the proper parent isoform. We provide below an overiew of the key remaining challenges and then provide for the first time a pipeline that solves these challenges. +## Quick Start + +1. Make sure you have [`nextflow`](https://nf-co.re/usage/installation), [`Docker`](https://docs.docker.com/engine/installation/) installed +3. Start running your own analysis! + + ```bash + nextflow run sheynkman-lab/Long-Read-Proteogenomics -profile test + ``` + +See [usage docs](docs/usage.md) for all of the available options when running the pipeline. -Protein Isoforms - Knowledge of a full-length transcriptome can provide for an empirically-derived predicted set of protein isoforms, which can serve as accurate and more precise models for protein inference. Third generation sequencing, exemplified recently by [Pac-Bio](https://www.pacb.com/) can, for the first time, shed light on full-length protein isoforms. Until now, protein isoforms were inferred through transript reconstruction on next generation sequencing data. However, this was a frought process with many errors. With the advent of long-read sequencing, we can observe full-length, fully-spliced mRNA transcripts that can be translated into protein sequencing for use in subsequent mass spectrometry experiments. A major remaining challenge is the identificaton of all open reading frames (ORFs). +## Documentation -Mass Spectrometry Data - Bottom-up mass spectrometry is the premeir method for protein identification. Mass-spectrometry, as as technology, provides a means to rapidly identify peptides produces by proteolytic digestion of intact proteins isoforms. It is fast and sensitive. Well done experiments frequently identify as many as 10,000 proteins in a single analysis. Yet, much can be done to improve the depth and accuracy of such experiments, especially for comprehensive identification of protein isoforms. First and foremost, the dominant choice of protease for bottom-up mass spectrometry is trypsin. Trypsin digest whole proteins into managealbe peptides that are easily separated by HPLC and identified by mass spectrometry. However, identifiction of a protein isoform requires at minimum a single peptide that can be uniquely ascribed to that isoform. Here, trypsin alone simply cannot deliver enough unique peptides to identify all protein isoforms in a sample. The reason is that many tryptic peptides are too short or too long for effective mass-spec analysis. In addtion, many tryptic peptides are shared between many protein isoforms giving them litte informative value. Recently, [Miller](https://pubs.acs.org/doi/10.1021/acs.jproteome.9b00330) demonstrated that use of multiple proteases for a single sample, can greatly improve protein inference by significantly increasing the number of unique peptides detetected. Frequently, protein isoforms can have multiple unique peptides for added identification confidence. +The sheynkman-lab/Long-Read-Proteogenomics pipeline comes with documentation about the pipeline: [usage](docs/usage.md) and [output](docs/output.md). -Protein Isoform Inference - [Protein inference](https://www.sciencedirect.com/science/article/abs/pii/S187439191630344X?via%3Dihub) is the process of guessing which proteins are present in a sample based on limited peptide evidence. Bottom-up proteomics, by definition, deals only in peptides, which are the pieces of a protein available for analyis following digestion with a protease. Top-down proteomics would be the preferred method for protein isoform detections because it analyses intact proteoforms. However, at the present time, it lacks the sensitivty that bottom-up has, yielding only fractional proteome coverage. In bottom-up, a key challenge is taking all of the identified peptides and then attempting to use them to infer presence of the protein isoforms from which they were derived. This process is aided greatly by deeper coverage of peptides unique to each isoform in the sample. Still it is not a solved problem. Here, in this project, we will integrate protein isoform presence as measured by copy number from the Pac-Bio data as a Bayesian prior in the protein inference algorithm. +### Pipeline overview -Pipeline overview - A pipeline is provided here that can talk raw Pac-Bio data and assembly an accurate list of protein isoforms with high probability of existing in the sample. This database is then used in [MetaMorpheus](https://github.com/smith-chem-wisc/MetaMorpheus) to search raw mass spectrometry data against the Pac-Bio reference. MetaMorpheus will use protein isoform read counts during protein inference. Two other protein databases are employed for the purposes of comparison. One is from [UniProt](https://www.uniprot.org/) and the other is from [GENCODE](https://www.gencodegenes.org/). A Jupyter notebook performs all final comparisons and data analysis. +The pipeline accepts as input raw Pac-Bio data and performs the assembly of an accurate list of protein isoforms with high probability of existing in the sample. This database is then used in [MetaMorpheus](https://github.com/smith-chem-wisc/MetaMorpheus) to search raw mass spectrometry data against the Pac-Bio reference. MetaMorpheus will use protein isoform read counts during protein inference. Two other protein databases are employed for the purposes of comparison. One is from [UniProt](https://www.uniprot.org/) and the other is from [GENCODE](https://www.gencodegenes.org/). A Jupyter notebook performs all final comparisons and data analysis. ![pipeline](https://user-images.githubusercontent.com/16841846/98399434-fa4b8a00-2027-11eb-953b-edb440c7ff8e.png) -## Authors +## Background + +Protein isoforms are the direct translational product of fully spliced mRNA molecules. Protein isoforms can be modified during or subsequent to translation with additional chemical moities (e.g. phosphorylation or acetylation) or they can be cleaved resulting in a [proteoform](https://www.nature.com/articles/nmeth.2369), which is the ultimate biological actor in many important biological processes. At a high level, protein isoforms can be predicted from genomic sequencing data and then observed by mass spectrometry. Despite impressive technological achievements in both realms (sequencing and mass spectrometry), many gaps exist in our ability to comprehensively identify all protein isoforms even for a single sample. Scientists ability to accomplish this goal depends on having detailed an accurate knowledge of all protein coding mRNA isoforms, comprehensive mass spectrometry data covering at least one unique region of each protein isoform, and a protein inference algorithm that can faithfully and accurately attribute observed peptides to the proper parent isoform. We provide below an overiew of the key remaining challenges and then provide for the first time a pipeline that solves these challenges. + +### Protein Isoforms + +Knowledge of a full-length transcriptome can provide for an empirically-derived predicted set of protein isoforms, which can serve as accurate and more precise models for protein inference. Third generation sequencing, exemplified recently by [Pac-Bio](https://www.pacb.com/) can, for the first time, shed light on full-length protein isoforms. Until now, protein isoforms were inferred through transript reconstruction on next generation sequencing data. However, this was a frought process with many errors. With the advent of long-read sequencing, we can observe full-length, fully-spliced mRNA transcripts that can be translated into protein sequencing for use in subsequent mass spectrometry experiments. A major remaining challenge is the identificaton of all open reading frames (ORFs). + +### Mass Spectrometry Data + +Bottom-up mass spectrometry is the premeir method for protein identification. Mass-spectrometry, as as technology, provides a means to rapidly identify peptides produces by proteolytic digestion of intact proteins isoforms. It is fast and sensitive. Well done experiments frequently identify as many as 10,000 proteins in a single analysis. Yet, much can be done to improve the depth and accuracy of such experiments, especially for comprehensive identification of protein isoforms. First and foremost, the dominant choice of protease for bottom-up mass spectrometry is trypsin. Trypsin digest whole proteins into managealbe peptides that are easily separated by HPLC and identified by mass spectrometry. However, identifiction of a protein isoform requires at minimum a single peptide that can be uniquely ascribed to that isoform. Here, trypsin alone simply cannot deliver enough unique peptides to identify all protein isoforms in a sample. The reason is that many tryptic peptides are too short or too long for effective mass-spec analysis. In addtion, many tryptic peptides are shared between many protein isoforms giving them litte informative value. Recently, [Miller](https://pubs.acs.org/doi/10.1021/acs.jproteome.9b00330) demonstrated that use of multiple proteases for a single sample, can greatly improve protein inference by significantly increasing the number of unique peptides detetected. Frequently, protein isoforms can have multiple unique peptides for added identification confidence. + +### Protein Isoform Inference + +[Protein inference](https://www.sciencedirect.com/science/article/abs/pii/S187439191630344X?via%3Dihub) is the process of guessing which proteins are present in a sample based on limited peptide evidence. Bottom-up proteomics, by definition, deals only in peptides, which are the pieces of a protein available for analyis following digestion with a protease. Top-down proteomics would be the preferred method for protein isoform detections because it analyses intact proteoforms. However, at the present time, it lacks the sensitivty that bottom-up has, yielding only fractional proteome coverage. In bottom-up, a key challenge is taking all of the identified peptides and then attempting to use them to infer presence of the protein isoforms from which they were derived. This process is aided greatly by deeper coverage of peptides unique to each isoform in the sample. Still it is not a solved problem. Here, in this project, we will integrate protein isoform presence as measured by copy number from the Pac-Bio data as a Bayesian prior in the protein inference algorithm. + +## Contributors + +- Gloria Sheynkman +- Michael Shortreed +- Rachel M. Miller +- Simran Kaur +- [Anne Deslattes Mays](https://orcid.org/0000-0001-7951-3439) + +## Repository template + +This pipeline was generated using a modification of the nf-core template. +You can cite the `nf-core` publication as follows: -Gloria Sheynkman -Michael Shortreed -Rachel M. Miller -Simran Kaur -Anne Deslattes Mays, https://orcid.org/0000-0001-7951-3439 +> **The nf-core framework for community-curated bioinformatics pipelines.** +> +> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen. +> +> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x). +> ReadCube: [Full Access Link](https://rdcu.be/b1GjZ) diff --git a/README_nextflow.md b/README_nextflow.md deleted file mode 100644 index 1f88418..0000000 --- a/README_nextflow.md +++ /dev/null @@ -1,69 +0,0 @@ -# ![nf-core/proteogenomics](docs/images/nf-core-proteogenomics_logo.png) - -**A workflow for delineating the Human Proteome at Isoform Resolution Through Long-read Proteog**. - -[![GitHub Actions CI Status](https://github.com/nf-core/proteogenomics/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/proteogenomics/actions) -[![GitHub Actions Linting Status](https://github.com/nf-core/proteogenomics/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/proteogenomics/actions) -[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A519.10.0-brightgreen.svg)](https://www.nextflow.io/) - -[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](https://bioconda.github.io/) -[![Docker](https://img.shields.io/docker/automated/nfcore/proteogenomics.svg)](https://hub.docker.com/r/nfcore/proteogenomics) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteogenomics-4A154B?logo=slack)](https://nfcore.slack.com/channels/proteogenomics) - -## Introduction - -The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker containers making installation trivial and results highly reproducible. - -## Quick Start - -1. Install [`nextflow`](https://nf-co.re/usage/installation) - -2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) or [`Podman`](https://podman.io/) for full pipeline reproducibility _(please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_ - -3. Download the pipeline and test it on a minimal dataset with a single command: - - ```bash - nextflow run nf-core/proteogenomics -profile test, - ``` - - > Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. - -4. Start running your own analysis! - - - - ```bash - nextflow run nf-core/proteogenomics -profile --input '*_R{1,2}.fastq.gz' --genome GRCh37 - ``` - -See [usage docs](https://nf-co.re/proteogenomics/usage) for all of the available options when running the pipeline. - -## Documentation - -The nf-core/proteogenomics pipeline comes with documentation about the pipeline: [usage](https://nf-co.re/proteogenomics/usage) and [output](https://nf-co.re/proteogenomics/output). - - - -## Credits - -nf-core/proteogenomics was originally written by enomicsSheynkman Group and Smith Group. - -## Contributions and Support - -If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). - -For further information or help, don't hesitate to get in touch on the [Slack `#proteogenomics` channel](https://nfcore.slack.com/channels/proteogenomics) (you can join with [this invite](https://nf-co.re/join/slack)). - -## Citation - - - - -You can cite the `nf-core` publication as follows: - -> **The nf-core framework for community-curated bioinformatics pipelines.** -> -> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen. -> -> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x). -> ReadCube: [Full Access Link](https://rdcu.be/b1GjZ) From 2a785f30ca3e080f530e3f0fab5e5c34c14bff1e Mon Sep 17 00:00:00 2001 From: cgpu Date: Sat, 7 Nov 2020 17:40:50 -0500 Subject: [PATCH 09/36] Updates docs/ --- docs/README.md | 6 ++---- docs/output.md | 18 +++++------------- docs/usage.md | 8 ++++---- 3 files changed, 11 insertions(+), 21 deletions(-) diff --git a/docs/README.md b/docs/README.md index 4077e14..5e1ecb9 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,10 +1,8 @@ -# nf-core/proteogenomics: Documentation +# sheynkman-lab/Long-Read-Proteogenomics: Documentation -The nf-core/proteogenomics documentation is split into the following pages: +The sheynkman-lab/Long-Read-Proteogenomics documentation is split into the following pages: * [Usage](usage.md) * An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. * [Output](output.md) * An overview of the different results produced by the pipeline and how to interpret them. - -You can find a lot more documentation about installing, configuring and running nf-core pipelines on the website: [https://nf-co.re](https://nf-co.re) diff --git a/docs/output.md b/docs/output.md index 1342ddc..f45591e 100644 --- a/docs/output.md +++ b/docs/output.md @@ -1,29 +1,21 @@ -# nf-core/proteogenomics: Output - -## :warning: Please read this documentation on the nf-core website: [https://nf-co.re/proteogenomics/output](https://nf-co.re/proteogenomics/output) - -> _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._ +# sheynkman-lab/Long-Read-Proteogenomics: Output ## Introduction -This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. +This document describes the output produced by the pipeline. The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. - - ## Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: -* [FastQC](#fastqc) - Read quality control -* [MultiQC](#multiqc) - Aggregate report describing results from the whole pipeline -* [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution +* [validate](#validate) - Test that the pipeline template runs without errors -## FastQC +## Validate -[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. +[Validate](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). diff --git a/docs/usage.md b/docs/usage.md index 34552e1..6c9ed9b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,4 +1,4 @@ -# nf-core/proteogenomics: Usage +# sheynkman-lab/Long-Read-Proteogenomics: Usage ## :warning: Please read this documentation on the nf-core website: [https://nf-co.re/proteogenomics/usage](https://nf-co.re/proteogenomics/usage) @@ -13,7 +13,7 @@ The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/proteogenomics --input '*_R{1,2}.fastq.gz' -profile docker +nextflow run sheynkman-lab/Long-Read-Proteogenomics --input '*_R{1,2}.fastq.gz' -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -32,14 +32,14 @@ results # Finished results (configurable, see below) When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: ```bash -nextflow pull nf-core/proteogenomics +nextflow pull sheynkman-lab/Long-Read-Proteogenomics ``` ### Reproducibility It's a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. -First, go to the [nf-core/proteogenomics releases page](https://github.com/nf-core/proteogenomics/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. +First, go to the [sheynkman-lab/Long-Read-Proteogenomics releases page](https://github.com/sheynkman-lab/Long-Read-Proteogenomics/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. From 75571d37f43da626a05c145ea115a5ea0942269f Mon Sep 17 00:00:00 2001 From: cgpu Date: Sat, 7 Nov 2020 17:41:14 -0500 Subject: [PATCH 10/36] Updates repo name in changelog --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0229b88..31fd441 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,11 @@ -# nf-core/proteogenomics: Changelog +# sheynkman-lab/Long-Read-Proteogenomics: Changelog The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## v1.0dev - [date] -Initial release of nf-core/proteogenomics, created with the [nf-core](https://nf-co.re/) template. +Initial release of sheynkman-lab/Long-Read-Proteogenomics, created with the [nf-core](https://nf-co.re/) template. ### `Added` From 492d1ae1055e9cff5390a20c4bad9c5a9ca5ca4c Mon Sep 17 00:00:00 2001 From: cgpu Date: Sat, 7 Nov 2020 17:42:22 -0500 Subject: [PATCH 11/36] Updates template test.config --- conf/test.config | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/conf/test.config b/conf/test.config index 500a79c..76edbf7 100644 --- a/conf/test.config +++ b/conf/test.config @@ -4,7 +4,7 @@ * ------------------------------------------------- * Defines bundled input files and everything required * to run a fast and simple test. Use as follows: - * nextflow run nf-core/proteogenomics -profile test, + * nextflow run sheynkman-lab/Long-Read-Proteogenomics -profile test, */ params { @@ -16,11 +16,4 @@ params { max_time = 48.h // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - single_end = false - input_paths = [ - ['Testdata', ['https://github.com/nf-core/test-datasets/raw/exoseq/testdata/Testdata_R1.tiny.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/exoseq/testdata/Testdata_R2.tiny.fastq.gz']], - ['SRR389222', ['https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub2.fastq.gz']] - ] } From da3687dea2fcdf5165dd571cdd2881384c776a99 Mon Sep 17 00:00:00 2001 From: cgpu Date: Sat, 7 Nov 2020 18:04:14 -0500 Subject: [PATCH 12/36] Adds bin folder and template wrapper R script --- bin/template_wrapper.R | 73 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100755 bin/template_wrapper.R diff --git a/bin/template_wrapper.R b/bin/template_wrapper.R new file mode 100755 index 0000000..9d588f2 --- /dev/null +++ b/bin/template_wrapper.R @@ -0,0 +1,73 @@ +#!/usr/bin/env Rscript + +############################## ARGUMENTS SECTION ############################# +## Collect arguments +args <- commandArgs(TRUE) + +## Default setting when no all arguments passed or help needed +if("--help" %in% args | "help" %in% args | (length(args) == 0) | (length(args) == 1) ) { + cat(" + The helper R Script template_wrapper.R + Mandatory arguments: + --input=path - The path to the input file + + --help - you are reading it + + Optionnal arguments: + + --a_number=num - A numeric value to check parameters of type numeric + Default: 12 + --a_name=chr - A name for checking parameters of type character + Default: 'this_name' + + Usage: + + The typical command for running the script is as follows: + + ./template_wrapper.R --input='example.txt' --a_number=8 + + To see help: + ./template_wrapper.R --help + + WARNING : here put all the things the user has to know + \n") + + q(save="no") +} + +## Parse arguments (we expect the form --arg=value) +parseArgs <- function(x) strsplit(sub("^--", "", x), "=") + +argsL <- as.list(as.character(as.data.frame(do.call("rbind", parseArgs(args)))$V2)) +names(argsL) <- as.data.frame(do.call("rbind", parseArgs(args)))$V1 +args <- argsL +rm(argsL) + +## Give some value to optional arguments if not provided +if(is.null(args$a_number)) {args$a_number = 12} else {args$a_number=as.numeric(args$a_number)} +if(is.null(args$a_name)) {args$a_name = "this_name"} else {args$a_name=as.character(args$a_name)} + +############################## LIBRARIES SECTION ############################# + +suppressWarnings(suppressMessages(library(stats))) + +# ######################### VARIABLES REASSIGNMENT SECTION ############################### + +# Facilitates testing and protects from wh-spaces, irregular chars + +# required +input <- args$input + +# optional +a_number <- args$a_number +a_name <- args$a_name + +cat("\n") +cat("ARGUMENTS SUMMARY") +cat("\n") +cat("input : ", input, "\n",sep="") +cat("a_number : ", a_number, "\n",sep="") +cat("a_name : ", a_name, "\n",sep="") + +# ############################### SCRIPT SECTION ############################### + From 35de804ccbb441ba2c07a8fcec26ae7ef50cefc1 Mon Sep 17 00:00:00 2001 From: cgpu Date: Sat, 7 Nov 2020 18:04:35 -0500 Subject: [PATCH 13/36] Adds pbccs in env.yml --- environment.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index bcbcb35..d958eec 100644 --- a/environment.yml +++ b/environment.yml @@ -10,6 +10,7 @@ dependencies: - conda-forge::markdown=3.1.1 - conda-forge::pymdown-extensions=6.0 - conda-forge::pygments=2.5.2 - # TODO nf-core: Add required software dependencies here - bioconda::fastqc=0.11.8 - bioconda::multiqc=1.7 + - pbccs + - From 5638c548c6ee8725a681101c22adbb5f3ac531a6 Mon Sep 17 00:00:00 2001 From: cgpu Date: Sat, 7 Nov 2020 18:08:10 -0500 Subject: [PATCH 14/36] Changes the location of pipeline info, logs --- nextflow.config | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nextflow.config b/nextflow.config index 4534b54..0ffd62d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -52,19 +52,19 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] timeline { enabled = true - file = "${params.tracedir}/execution_timeline.html" + file = "${params.outdir}/pipeline_info/execution_timeline.html" } report { enabled = true - file = "${params.tracedir}/execution_report.html" + file = "${params.outdir}/pipeline_info/execution_report.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace.txt" + file = "${params.outdir}/pipeline_info/execution_trace.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag.svg" + file = "${params.outdir}/pipeline_info/pipeline_dag.svg" } manifest { From 8fdecfde877c418a3b714e78691a07914605c58f Mon Sep 17 00:00:00 2001 From: cgpu Date: Sat, 7 Nov 2020 23:10:11 +0000 Subject: [PATCH 15/36] Adds .github folder --- .github/.dockstore.yml | 5 ++ .github/CONTRIBUTING.md | 57 +++++++++++++++++++ .github/ISSUE_TEMPLATE/bug_report.md | 45 +++++++++++++++ .github/ISSUE_TEMPLATE/feature_request.md | 26 +++++++++ .github/PULL_REQUEST_TEMPLATE.md | 20 +++++++ .github/markdownlint.yml | 5 ++ .github/workflows/awsfulltest.yml | 42 ++++++++++++++ .github/workflows/awstest.yml | 39 +++++++++++++ .github/workflows/branch.yml | 37 ++++++++++++ .github/workflows/ci.yml | 55 ++++++++++++++++++ .github/workflows/linting.yml | 68 +++++++++++++++++++++++ .github/workflows/push_dockerhub.yml | 40 +++++++++++++ 12 files changed, 439 insertions(+) create mode 100644 .github/.dockstore.yml create mode 100644 .github/CONTRIBUTING.md create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md create mode 100644 .github/PULL_REQUEST_TEMPLATE.md create mode 100644 .github/markdownlint.yml create mode 100644 .github/workflows/awsfulltest.yml create mode 100644 .github/workflows/awstest.yml create mode 100644 .github/workflows/branch.yml create mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/linting.yml create mode 100644 .github/workflows/push_dockerhub.yml diff --git a/.github/.dockstore.yml b/.github/.dockstore.yml new file mode 100644 index 0000000..030138a --- /dev/null +++ b/.github/.dockstore.yml @@ -0,0 +1,5 @@ +# Dockstore config version, not pipeline version +version: 1.2 +workflows: + - subclass: nfl + primaryDescriptorPath: /nextflow.config diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 0000000..e434cdf --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,57 @@ +# nf-core/proteogenomics: Contributing Guidelines + +Hi there! +Many thanks for taking an interest in improving nf-core/proteogenomics. + +We try to manage the required tasks for nf-core/proteogenomics using GitHub issues, you probably came to this page when creating one. +Please use the pre-filled template to save time. + +However, don't be put off by this template - other more general issues and suggestions are welcome! +Contributions to the code are even more welcome ;) + +> If you need help using or modifying nf-core/proteogenomics then the best place to ask is on the nf-core Slack [#proteogenomics](https://nfcore.slack.com/channels/proteogenomics) channel ([join our Slack here](https://nf-co.re/join/slack)). + +## Contribution workflow + +If you'd like to write some code for nf-core/proteogenomics, the standard workflow is as follows: + +1. Check that there isn't already an issue about your idea in the [nf-core/proteogenomics issues](https://github.com/nf-core/proteogenomics/issues) to avoid duplicating work + * If there isn't one already, please create one so that others know you're working on this +2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [nf-core/proteogenomics repository](https://github.com/nf-core/proteogenomics) to your GitHub account +3. Make the necessary changes / additions within your forked repository +4. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged + +If you're not used to this workflow with git, you can start with some [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or even their [excellent `git` resources](https://try.github.io/). + +## Tests + +When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. +Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. + +There are typically two types of tests that run: + +### Lint Tests + +`nf-core` has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. +To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint ` command. + +If any failures or warnings are encountered, please follow the listed URL for more documentation. + +### Pipeline Tests + +Each `nf-core` pipeline should be set up with a minimal set of test-data. +`GitHub Actions` then runs the pipeline on this data to ensure that it exits successfully. +If there are any failures then the automated tests fail. +These tests are run both with the latest available version of `Nextflow` and also the minimum required version that is stated in the pipeline code. + +## Patch + +:warning: Only in the unlikely and regretful event of a release happening with a bug. + +* On your own fork, make a new branch `patch` based on `upstream/master`. +* Fix the bug, and bump version (X.Y.Z+1). +* A PR should be made on `master` from patch to directly this particular bug. + +## Getting help + +For further information/help, please consult the [nf-core/proteogenomics documentation](https://nf-co.re/proteogenomics/usage) and don't hesitate to get in touch on the nf-core Slack [#proteogenomics](https://nfcore.slack.com/channels/proteogenomics) channel ([join our Slack here](https://nf-co.re/join/slack)). diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..83ab441 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,45 @@ + + +## Description of the bug + + + +## Steps to reproduce + +Steps to reproduce the behaviour: + +1. Command line: +2. See error: + +## Expected behaviour + + + +## System + +- Hardware: +- Executor: +- OS: +- Version + +## Nextflow Installation + +- Version: + +## Container engine + +- Engine: +- version: +- Image tag: + +## Additional context + + diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..d5ccdce --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,26 @@ + + +## Is your feature request related to a problem? Please describe + + + + + +## Describe the solution you'd like + + + +## Describe alternatives you've considered + + + +## Additional context + + diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..a88e15b --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,20 @@ + + +## PR checklist + +- [ ] This comment contains a description of changes (with reason) +- [ ] `CHANGELOG.md` is updated +- [ ] If you've fixed a bug or added code that should be tested, add tests! +- [ ] Documentation in `docs` is updated +- [ ] If necessary, also make a PR on the [nf-core/proteogenomics branch on the nf-core/test-datasets repo](https://github.com/nf-core/test-datasets/pull/new/nf-core/proteogenomics) diff --git a/.github/markdownlint.yml b/.github/markdownlint.yml new file mode 100644 index 0000000..96b12a7 --- /dev/null +++ b/.github/markdownlint.yml @@ -0,0 +1,5 @@ +# Markdownlint configuration file +default: true, +line-length: false +no-duplicate-header: + siblings_only: true diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml new file mode 100644 index 0000000..c4b991a --- /dev/null +++ b/.github/workflows/awsfulltest.yml @@ -0,0 +1,42 @@ +name: nf-core AWS full size tests +# This workflow is triggered on published releases. +# It can be additionally triggered manually with GitHub actions workflow dispatch. +# It runs the -profile 'test_full' on AWS batch + +on: + release: + types: [published] + workflow_dispatch: + +jobs: + run-awstest: + name: Run AWS full tests + if: github.repository == 'nf-core/proteogenomics' + runs-on: ubuntu-latest + steps: + - name: Setup Miniconda + uses: goanpeca/setup-miniconda@v1.0.2 + with: + auto-update-conda: true + python-version: 3.7 + - name: Install awscli + run: conda install -c conda-forge awscli + - name: Start AWS batch job + # TODO nf-core: You can customise AWS full pipeline tests as required + # Add full size test data (but still relatively small datasets for few samples) + # on the `test_full.config` test runs with only one set of parameters + # Then specify `-profile test_full` instead of `-profile test` on the AWS batch command + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + TOWER_ACCESS_TOKEN: ${{ secrets.AWS_TOWER_TOKEN }} + AWS_JOB_DEFINITION: ${{ secrets.AWS_JOB_DEFINITION }} + AWS_JOB_QUEUE: ${{ secrets.AWS_JOB_QUEUE }} + AWS_S3_BUCKET: ${{ secrets.AWS_S3_BUCKET }} + run: | + aws batch submit-job \ + --region eu-west-1 \ + --job-name nf-core-proteogenomics \ + --job-queue $AWS_JOB_QUEUE \ + --job-definition $AWS_JOB_DEFINITION \ + --container-overrides '{"command": ["nf-core/proteogenomics", "-r '"${GITHUB_SHA}"' -profile test --outdir s3://'"${AWS_S3_BUCKET}"'/proteogenomics/results-'"${GITHUB_SHA}"' -w s3://'"${AWS_S3_BUCKET}"'/proteogenomics/work-'"${GITHUB_SHA}"' -with-tower"], "environment": [{"name": "TOWER_ACCESS_TOKEN", "value": "'"$TOWER_ACCESS_TOKEN"'"}]}' diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml new file mode 100644 index 0000000..29d1bc7 --- /dev/null +++ b/.github/workflows/awstest.yml @@ -0,0 +1,39 @@ +name: nf-core AWS test +# This workflow is triggered on push to the master branch. +# It can be additionally triggered manually with GitHub actions workflow dispatch. +# It runs the -profile 'test' on AWS batch. + +on: + workflow_dispatch: + +jobs: + run-awstest: + name: Run AWS tests + if: github.repository == 'nf-core/proteogenomics' + runs-on: ubuntu-latest + steps: + - name: Setup Miniconda + uses: goanpeca/setup-miniconda@v1.0.2 + with: + auto-update-conda: true + python-version: 3.7 + - name: Install awscli + run: conda install -c conda-forge awscli + - name: Start AWS batch job + # TODO nf-core: You can customise CI pipeline run tests as required + # For example: adding multiple test runs with different parameters + # Remember that you can parallelise this by using strategy.matrix + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + TOWER_ACCESS_TOKEN: ${{ secrets.AWS_TOWER_TOKEN }} + AWS_JOB_DEFINITION: ${{ secrets.AWS_JOB_DEFINITION }} + AWS_JOB_QUEUE: ${{ secrets.AWS_JOB_QUEUE }} + AWS_S3_BUCKET: ${{ secrets.AWS_S3_BUCKET }} + run: | + aws batch submit-job \ + --region eu-west-1 \ + --job-name nf-core-proteogenomics \ + --job-queue $AWS_JOB_QUEUE \ + --job-definition $AWS_JOB_DEFINITION \ + --container-overrides '{"command": ["nf-core/proteogenomics", "-r '"${GITHUB_SHA}"' -profile test --outdir s3://'"${AWS_S3_BUCKET}"'/proteogenomics/results-'"${GITHUB_SHA}"' -w s3://'"${AWS_S3_BUCKET}"'/proteogenomics/work-'"${GITHUB_SHA}"' -with-tower"], "environment": [{"name": "TOWER_ACCESS_TOKEN", "value": "'"$TOWER_ACCESS_TOKEN"'"}]}' diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml new file mode 100644 index 0000000..c8cccee --- /dev/null +++ b/.github/workflows/branch.yml @@ -0,0 +1,37 @@ +name: nf-core branch protection +# This workflow is triggered on PRs to master branch on the repository +# It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev` +on: + pull_request: + branches: [master] + +jobs: + test: + runs-on: ubuntu-latest + steps: + # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches + - name: Check PRs + if: github.repository == 'nf-core/proteogenomics' + run: | + { [[ ${{github.event.pull_request.head.repo.full_name}} == nf-core/proteogenomics ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + + + # If the above check failed, post a comment on the PR explaining the failure + # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@v1 + with: + message: | + Hi @${{ github.event.pull_request.user.login }}, + + It looks like this pull-request is has been made against the ${{github.event.pull_request.head.repo.full_name}} `master` branch. + The `master` branch on nf-core repositories should always contain code from the latest release. + Because of this, PRs to `master` are only allowed if they come from the ${{github.event.pull_request.head.repo.full_name}} `dev` branch. + + You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page. + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false + diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..f0d227f --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,55 @@ +name: nf-core CI +# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors +on: + push: + branches: + - dev + pull_request: + release: + types: [published] + +jobs: + test: + name: Run workflow tests + # Only run on push if this is the nf-core dev branch (merged PRs) + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/proteogenomics') }} + runs-on: ubuntu-latest + env: + NXF_VER: ${{ matrix.nxf_ver }} + NXF_ANSI_LOG: false + strategy: + matrix: + # Nextflow versions: check pipeline minimum and current latest + nxf_ver: ['19.10.0', ''] + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Check if Dockerfile or Conda environment changed + uses: technote-space/get-diff-action@v1 + with: + PREFIX_FILTER: | + Dockerfile + environment.yml + + - name: Build new docker image + if: env.GIT_DIFF + run: docker build --no-cache . -t nfcore/proteogenomics:dev + + - name: Pull docker image + if: ${{ !env.GIT_DIFF }} + run: | + docker pull nfcore/proteogenomics:dev + docker tag nfcore/proteogenomics:dev nfcore/proteogenomics:dev + + - name: Install Nextflow + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + - name: Run pipeline with test data + # TODO nf-core: You can customise CI pipeline run tests as required + # For example: adding multiple test runs with different parameters + # Remember that you can parallelise this by using strategy.matrix + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test,docker diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml new file mode 100644 index 0000000..8e8d5bb --- /dev/null +++ b/.github/workflows/linting.yml @@ -0,0 +1,68 @@ +name: nf-core linting +# This workflow is triggered on pushes and PRs to the repository. +# It runs the `nf-core lint` and markdown lint tests to ensure that the code meets the nf-core guidelines +on: + push: + pull_request: + release: + types: [published] + +jobs: + Markdown: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-node@v1 + with: + node-version: '10' + - name: Install markdownlint + run: npm install -g markdownlint-cli + - name: Run Markdownlint + run: markdownlint ${GITHUB_WORKSPACE} -c ${GITHUB_WORKSPACE}/.github/markdownlint.yml + YAML: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - uses: actions/setup-node@v1 + with: + node-version: '10' + - name: Install yaml-lint + run: npm install -g yaml-lint + - name: Run yaml-lint + run: yamllint $(find ${GITHUB_WORKSPACE} -type f -name "*.yml") + nf-core: + runs-on: ubuntu-latest + steps: + + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Install Nextflow + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + - uses: actions/setup-python@v1 + with: + python-version: '3.6' + architecture: 'x64' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install nf-core + + - name: Run nf-core lint + env: + GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + run: nf-core -l lint_log.txt lint ${GITHUB_WORKSPACE} + + - name: Upload linting log file artifact + if: ${{ always() }} + uses: actions/upload-artifact@v2 + with: + name: linting-log-file + path: lint_log.txt + diff --git a/.github/workflows/push_dockerhub.yml b/.github/workflows/push_dockerhub.yml new file mode 100644 index 0000000..f8831a9 --- /dev/null +++ b/.github/workflows/push_dockerhub.yml @@ -0,0 +1,40 @@ +name: nf-core Docker push +# This builds the docker image and pushes it to DockerHub +# Runs on nf-core repo releases and push event to 'dev' branch (PR merges) +on: + push: + branches: + - dev + release: + types: [published] + +jobs: + push_dockerhub: + name: Push new Docker image to Docker Hub + runs-on: ubuntu-latest + # Only run for the nf-core repo, for releases and merged PRs + if: ${{ github.repository == 'nf-core/proteogenomics' }} + env: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASS }} + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Build new docker image + run: docker build --no-cache . -t nfcore/proteogenomics:latest + + - name: Push Docker image to DockerHub (dev) + if: ${{ github.event_name == 'push' }} + run: | + echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin + docker tag nfcore/proteogenomics:latest nfcore/proteogenomics:dev + docker push nfcore/proteogenomics:dev + + - name: Push Docker image to DockerHub (release) + if: ${{ github.event_name == 'release' }} + run: | + echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin + docker push nfcore/proteogenomics:latest + docker tag nfcore/proteogenomics:latest nfcore/proteogenomics:${{ github.event.release.tag_name }} + docker push nfcore/proteogenomics:${{ github.event.release.tag_name }} From 6fcae1c41afb383ddb19790e46c669f355fd45d4 Mon Sep 17 00:00:00 2001 From: cgpu Date: Sat, 7 Nov 2020 18:12:20 -0500 Subject: [PATCH 16/36] Removes redendant files from GH actions --- .github/.dockstore.yml | 5 ----- .github/markdownlint.yml | 5 ----- 2 files changed, 10 deletions(-) delete mode 100644 .github/.dockstore.yml delete mode 100644 .github/markdownlint.yml diff --git a/.github/.dockstore.yml b/.github/.dockstore.yml deleted file mode 100644 index 030138a..0000000 --- a/.github/.dockstore.yml +++ /dev/null @@ -1,5 +0,0 @@ -# Dockstore config version, not pipeline version -version: 1.2 -workflows: - - subclass: nfl - primaryDescriptorPath: /nextflow.config diff --git a/.github/markdownlint.yml b/.github/markdownlint.yml deleted file mode 100644 index 96b12a7..0000000 --- a/.github/markdownlint.yml +++ /dev/null @@ -1,5 +0,0 @@ -# Markdownlint configuration file -default: true, -line-length: false -no-duplicate-header: - siblings_only: true From 2ae171995960cc46d1c0fb3b181aeb9e31520fce Mon Sep 17 00:00:00 2001 From: cgpu Date: Sat, 7 Nov 2020 18:19:41 -0500 Subject: [PATCH 17/36] Updates CONTRIBUTING.md --- .github/CONTRIBUTING.md | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index e434cdf..5bd24a3 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -1,23 +1,23 @@ -# nf-core/proteogenomics: Contributing Guidelines +# sheynkman-lab/Long-Read-Proteogenomics: Contributing Guidelines Hi there! -Many thanks for taking an interest in improving nf-core/proteogenomics. +Many thanks for taking an interest in improving sheynkman-lab/Long-Read-Proteogenomics. -We try to manage the required tasks for nf-core/proteogenomics using GitHub issues, you probably came to this page when creating one. +We try to manage the required tasks for sheynkman-lab/Long-Read-Proteogenomics using GitHub issues, you probably came to this page when creating one. Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) -> If you need help using or modifying nf-core/proteogenomics then the best place to ask is on the nf-core Slack [#proteogenomics](https://nfcore.slack.com/channels/proteogenomics) channel ([join our Slack here](https://nf-co.re/join/slack)). +> If you need help using or modifying sheynkman-lab/Long-Read-Proteogenomics then the best place to ask is on the nf-core Slack [#proteogenomics](https://nfcore.slack.com/channels/proteogenomics) channel ([join our Slack here](https://nf-co.re/join/slack)). ## Contribution workflow -If you'd like to write some code for nf-core/proteogenomics, the standard workflow is as follows: +If you'd like to write some code for sheynkman-lab/Long-Read-Proteogenomics, the standard workflow is as follows: -1. Check that there isn't already an issue about your idea in the [nf-core/proteogenomics issues](https://github.com/nf-core/proteogenomics/issues) to avoid duplicating work +1. Check that there isn't already an issue about your idea in the [sheynkman-lab/Long-Read-Proteogenomics issues](https://github.com/sheynkman-lab/Long-Read-Proteogenomics/issues) to avoid duplicating work * If there isn't one already, please create one so that others know you're working on this -2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [nf-core/proteogenomics repository](https://github.com/nf-core/proteogenomics) to your GitHub account +2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [sheynkman-lab/Long-Read-Proteogenomics repository](https://github.com/sheynkman-lab/Long-Read-Proteogenomics) to your GitHub account 3. Make the necessary changes / additions within your forked repository 4. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged @@ -28,13 +28,6 @@ If you're not used to this workflow with git, you can start with some [docs from When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. -There are typically two types of tests that run: - -### Lint Tests - -`nf-core` has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. -To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint ` command. - If any failures or warnings are encountered, please follow the listed URL for more documentation. ### Pipeline Tests @@ -50,8 +43,4 @@ These tests are run both with the latest available version of `Nextflow` and als * On your own fork, make a new branch `patch` based on `upstream/master`. * Fix the bug, and bump version (X.Y.Z+1). -* A PR should be made on `master` from patch to directly this particular bug. - -## Getting help - -For further information/help, please consult the [nf-core/proteogenomics documentation](https://nf-co.re/proteogenomics/usage) and don't hesitate to get in touch on the nf-core Slack [#proteogenomics](https://nfcore.slack.com/channels/proteogenomics) channel ([join our Slack here](https://nf-co.re/join/slack)). +* A PR should be made on `master` from patch to directly this particular bug. \ No newline at end of file From dccbaadd9c863385f4c76dace2b6446c03ffdc12 Mon Sep 17 00:00:00 2001 From: cgpu Date: Sat, 7 Nov 2020 18:20:53 -0500 Subject: [PATCH 18/36] Updates ISSUE_TEMPLATE --- .github/ISSUE_TEMPLATE/feature_request.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index d5ccdce..ffd7287 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -1,5 +1,5 @@ ## PR checklist @@ -17,4 +15,3 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/prot - [ ] `CHANGELOG.md` is updated - [ ] If you've fixed a bug or added code that should be tested, add tests! - [ ] Documentation in `docs` is updated -- [ ] If necessary, also make a PR on the [nf-core/proteogenomics branch on the nf-core/test-datasets repo](https://github.com/nf-core/test-datasets/pull/new/nf-core/proteogenomics) From 5fb8e1bb82f98d973e42cd8520eb06d2f5ee4593 Mon Sep 17 00:00:00 2001 From: cgpu Date: Sat, 7 Nov 2020 18:22:57 -0500 Subject: [PATCH 20/36] Removes AWS tests --- .github/workflows/awsfulltest.yml | 42 ------------------------------- .github/workflows/awstest.yml | 39 ---------------------------- 2 files changed, 81 deletions(-) delete mode 100644 .github/workflows/awsfulltest.yml delete mode 100644 .github/workflows/awstest.yml diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml deleted file mode 100644 index c4b991a..0000000 --- a/.github/workflows/awsfulltest.yml +++ /dev/null @@ -1,42 +0,0 @@ -name: nf-core AWS full size tests -# This workflow is triggered on published releases. -# It can be additionally triggered manually with GitHub actions workflow dispatch. -# It runs the -profile 'test_full' on AWS batch - -on: - release: - types: [published] - workflow_dispatch: - -jobs: - run-awstest: - name: Run AWS full tests - if: github.repository == 'nf-core/proteogenomics' - runs-on: ubuntu-latest - steps: - - name: Setup Miniconda - uses: goanpeca/setup-miniconda@v1.0.2 - with: - auto-update-conda: true - python-version: 3.7 - - name: Install awscli - run: conda install -c conda-forge awscli - - name: Start AWS batch job - # TODO nf-core: You can customise AWS full pipeline tests as required - # Add full size test data (but still relatively small datasets for few samples) - # on the `test_full.config` test runs with only one set of parameters - # Then specify `-profile test_full` instead of `-profile test` on the AWS batch command - env: - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - TOWER_ACCESS_TOKEN: ${{ secrets.AWS_TOWER_TOKEN }} - AWS_JOB_DEFINITION: ${{ secrets.AWS_JOB_DEFINITION }} - AWS_JOB_QUEUE: ${{ secrets.AWS_JOB_QUEUE }} - AWS_S3_BUCKET: ${{ secrets.AWS_S3_BUCKET }} - run: | - aws batch submit-job \ - --region eu-west-1 \ - --job-name nf-core-proteogenomics \ - --job-queue $AWS_JOB_QUEUE \ - --job-definition $AWS_JOB_DEFINITION \ - --container-overrides '{"command": ["nf-core/proteogenomics", "-r '"${GITHUB_SHA}"' -profile test --outdir s3://'"${AWS_S3_BUCKET}"'/proteogenomics/results-'"${GITHUB_SHA}"' -w s3://'"${AWS_S3_BUCKET}"'/proteogenomics/work-'"${GITHUB_SHA}"' -with-tower"], "environment": [{"name": "TOWER_ACCESS_TOKEN", "value": "'"$TOWER_ACCESS_TOKEN"'"}]}' diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml deleted file mode 100644 index 29d1bc7..0000000 --- a/.github/workflows/awstest.yml +++ /dev/null @@ -1,39 +0,0 @@ -name: nf-core AWS test -# This workflow is triggered on push to the master branch. -# It can be additionally triggered manually with GitHub actions workflow dispatch. -# It runs the -profile 'test' on AWS batch. - -on: - workflow_dispatch: - -jobs: - run-awstest: - name: Run AWS tests - if: github.repository == 'nf-core/proteogenomics' - runs-on: ubuntu-latest - steps: - - name: Setup Miniconda - uses: goanpeca/setup-miniconda@v1.0.2 - with: - auto-update-conda: true - python-version: 3.7 - - name: Install awscli - run: conda install -c conda-forge awscli - - name: Start AWS batch job - # TODO nf-core: You can customise CI pipeline run tests as required - # For example: adding multiple test runs with different parameters - # Remember that you can parallelise this by using strategy.matrix - env: - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - TOWER_ACCESS_TOKEN: ${{ secrets.AWS_TOWER_TOKEN }} - AWS_JOB_DEFINITION: ${{ secrets.AWS_JOB_DEFINITION }} - AWS_JOB_QUEUE: ${{ secrets.AWS_JOB_QUEUE }} - AWS_S3_BUCKET: ${{ secrets.AWS_S3_BUCKET }} - run: | - aws batch submit-job \ - --region eu-west-1 \ - --job-name nf-core-proteogenomics \ - --job-queue $AWS_JOB_QUEUE \ - --job-definition $AWS_JOB_DEFINITION \ - --container-overrides '{"command": ["nf-core/proteogenomics", "-r '"${GITHUB_SHA}"' -profile test --outdir s3://'"${AWS_S3_BUCKET}"'/proteogenomics/results-'"${GITHUB_SHA}"' -w s3://'"${AWS_S3_BUCKET}"'/proteogenomics/work-'"${GITHUB_SHA}"' -with-tower"], "environment": [{"name": "TOWER_ACCESS_TOKEN", "value": "'"$TOWER_ACCESS_TOKEN"'"}]}' From d7da7073200b39a4fe1068764025aaf0dc973051 Mon Sep 17 00:00:00 2001 From: cgpu Date: Sat, 7 Nov 2020 18:30:56 -0500 Subject: [PATCH 21/36] Adds misspelling test --- .github/workflows/branch.yml | 4 +-- .github/workflows/catch_typos.yml | 17 ++++++++++++ .github/workflows/ci.yml | 28 ++----------------- .github/workflows/push_dockerhub.yml | 40 ---------------------------- 4 files changed, 21 insertions(+), 68 deletions(-) create mode 100644 .github/workflows/catch_typos.yml delete mode 100644 .github/workflows/push_dockerhub.yml diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index c8cccee..f206f16 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -11,9 +11,9 @@ jobs: steps: # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches - name: Check PRs - if: github.repository == 'nf-core/proteogenomics' + if: github.repository == 'sheynkman-lab/Long-Read-Proteogenomics' run: | - { [[ ${{github.event.pull_request.head.repo.full_name}} == nf-core/proteogenomics ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + { [[ ${{github.event.pull_request.head.repo.full_name}} == sheynkman-lab/Long-Read-Proteogenomics ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] # If the above check failed, post a comment on the PR explaining the failure diff --git a/.github/workflows/catch_typos.yml b/.github/workflows/catch_typos.yml new file mode 100644 index 0000000..c8ea7f5 --- /dev/null +++ b/.github/workflows/catch_typos.yml @@ -0,0 +1,17 @@ +# From here: https://github.com/reviewdog/action-misspell +name: reviewdog misspell +on: [pull_request] +jobs: + misspell: + name: runner / misspell + runs-on: ubuntu-latest + steps: + - name: Check out code. + uses: actions/checkout@v2 + - name: misspell + uses: reviewdog/action-misspell@master + with: + github_token: ${{ secrets.github_token }} + locale: "US" + reporter: github-pr-review + level: error \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f0d227f..1261f62 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,4 +1,4 @@ -name: nf-core CI +name: Testing for Long Reads Proteogenomics # This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors on: push: @@ -11,8 +11,6 @@ on: jobs: test: name: Run workflow tests - # Only run on push if this is the nf-core dev branch (merged PRs) - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/proteogenomics') }} runs-on: ubuntu-latest env: NXF_VER: ${{ matrix.nxf_ver }} @@ -24,32 +22,10 @@ jobs: steps: - name: Check out pipeline code uses: actions/checkout@v2 - - - name: Check if Dockerfile or Conda environment changed - uses: technote-space/get-diff-action@v1 - with: - PREFIX_FILTER: | - Dockerfile - environment.yml - - - name: Build new docker image - if: env.GIT_DIFF - run: docker build --no-cache . -t nfcore/proteogenomics:dev - - - name: Pull docker image - if: ${{ !env.GIT_DIFF }} - run: | - docker pull nfcore/proteogenomics:dev - docker tag nfcore/proteogenomics:dev nfcore/proteogenomics:dev - - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash sudo mv nextflow /usr/local/bin/ - - name: Run pipeline with test data - # TODO nf-core: You can customise CI pipeline run tests as required - # For example: adding multiple test runs with different parameters - # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker + nextflow run ${GITHUB_WORKSPACE} -profile test diff --git a/.github/workflows/push_dockerhub.yml b/.github/workflows/push_dockerhub.yml deleted file mode 100644 index f8831a9..0000000 --- a/.github/workflows/push_dockerhub.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: nf-core Docker push -# This builds the docker image and pushes it to DockerHub -# Runs on nf-core repo releases and push event to 'dev' branch (PR merges) -on: - push: - branches: - - dev - release: - types: [published] - -jobs: - push_dockerhub: - name: Push new Docker image to Docker Hub - runs-on: ubuntu-latest - # Only run for the nf-core repo, for releases and merged PRs - if: ${{ github.repository == 'nf-core/proteogenomics' }} - env: - DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} - DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASS }} - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Build new docker image - run: docker build --no-cache . -t nfcore/proteogenomics:latest - - - name: Push Docker image to DockerHub (dev) - if: ${{ github.event_name == 'push' }} - run: | - echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin - docker tag nfcore/proteogenomics:latest nfcore/proteogenomics:dev - docker push nfcore/proteogenomics:dev - - - name: Push Docker image to DockerHub (release) - if: ${{ github.event_name == 'release' }} - run: | - echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin - docker push nfcore/proteogenomics:latest - docker tag nfcore/proteogenomics:latest nfcore/proteogenomics:${{ github.event.release.tag_name }} - docker push nfcore/proteogenomics:${{ github.event.release.tag_name }} From 91ea8d99258edd61f3ca5fec6d06ad674ec3e2d7 Mon Sep 17 00:00:00 2001 From: cgpu Date: Sat, 7 Nov 2020 18:42:25 -0500 Subject: [PATCH 22/36] Removes linting.yml --- .github/workflows/linting.yml | 68 ----------------------------------- 1 file changed, 68 deletions(-) delete mode 100644 .github/workflows/linting.yml diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml deleted file mode 100644 index 8e8d5bb..0000000 --- a/.github/workflows/linting.yml +++ /dev/null @@ -1,68 +0,0 @@ -name: nf-core linting -# This workflow is triggered on pushes and PRs to the repository. -# It runs the `nf-core lint` and markdown lint tests to ensure that the code meets the nf-core guidelines -on: - push: - pull_request: - release: - types: [published] - -jobs: - Markdown: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions/setup-node@v1 - with: - node-version: '10' - - name: Install markdownlint - run: npm install -g markdownlint-cli - - name: Run Markdownlint - run: markdownlint ${GITHUB_WORKSPACE} -c ${GITHUB_WORKSPACE}/.github/markdownlint.yml - YAML: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v1 - - uses: actions/setup-node@v1 - with: - node-version: '10' - - name: Install yaml-lint - run: npm install -g yaml-lint - - name: Run yaml-lint - run: yamllint $(find ${GITHUB_WORKSPACE} -type f -name "*.yml") - nf-core: - runs-on: ubuntu-latest - steps: - - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - - uses: actions/setup-python@v1 - with: - python-version: '3.6' - architecture: 'x64' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install nf-core - - - name: Run nf-core lint - env: - GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} - run: nf-core -l lint_log.txt lint ${GITHUB_WORKSPACE} - - - name: Upload linting log file artifact - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: linting-log-file - path: lint_log.txt - From 2ae3e35c5a7059576c22ead68c4b7f2a82a2c546 Mon Sep 17 00:00:00 2001 From: cgpu <38183826+cgpu@users.noreply.github.com> Date: Sat, 7 Nov 2020 23:50:41 +0000 Subject: [PATCH 23/36] Corrects typo --- .github/ISSUE_TEMPLATE/bug_report.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 83ab441..47b4f67 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -14,7 +14,7 @@ Please delete this text and anything that's not relevant from the template below ## Steps to reproduce -Steps to reproduce the behaviour: +Steps to reproduce the behavior: 1. Command line: 2. See error: From 10f556ecf0e72a5155f3bedbd9e840f0436bc8d8 Mon Sep 17 00:00:00 2001 From: cgpu Date: Sat, 7 Nov 2020 18:57:00 -0500 Subject: [PATCH 24/36] Removes igenomes config --- conf/igenomes.config | 421 ------------------------------------------- 1 file changed, 421 deletions(-) delete mode 100644 conf/igenomes.config diff --git a/conf/igenomes.config b/conf/igenomes.config deleted file mode 100644 index caeafce..0000000 --- a/conf/igenomes.config +++ /dev/null @@ -1,421 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for iGenomes paths - * ------------------------------------------------- - * Defines reference genomes, using iGenome paths - * Can be used by any config that customises the base - * path using $params.igenomes_base / --igenomes_base - */ - -params { - // illumina iGenomes reference file paths - genomes { - 'GRCh37' { - fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "2.7e9" - blacklist = "${baseDir}/assets/blacklists/GRCh37-blacklist.bed" - } - 'GRCh38' { - fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${baseDir}/assets/blacklists/hg38-blacklist.bed" - } - 'GRCm38' { - fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "1.87e9" - blacklist = "${baseDir}/assets/blacklists/GRCm38-blacklist.bed" - } - 'TAIR10' { - fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" - mito_name = "Mt" - } - 'EB2' { - fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" - } - 'UMD3.1' { - fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" - mito_name = "MT" - } - 'WBcel235' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" - mito_name = "MtDNA" - macs_gsize = "9e7" - } - 'CanFam3.1' { - fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" - mito_name = "MT" - } - 'GRCz10' { - fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'BDGP6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" - mito_name = "M" - macs_gsize = "1.2e8" - } - 'EquCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" - mito_name = "MT" - } - 'EB1' { - fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" - } - 'Galgal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'Gm01' { - fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" - } - 'Mmul_1' { - fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" - mito_name = "MT" - } - 'IRGSP-1.0' { - fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" - mito_name = "Mt" - } - 'CHIMP2.1.4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" - mito_name = "MT" - } - 'Rnor_6.0' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'R64-1-1' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" - mito_name = "MT" - macs_gsize = "1.2e7" - } - 'EF2' { - fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "1.21e7" - } - 'Sbi1' { - fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" - } - 'Sscrofa10.2' { - fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" - mito_name = "MT" - } - 'AGPv3' { - fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" - mito_name = "Mt" - } - 'hg38' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${baseDir}/assets/blacklists/hg38-blacklist.bed" - } - 'hg19' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${baseDir}/assets/blacklists/hg19-blacklist.bed" - } - 'mm10' { - fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "1.87e9" - blacklist = "${baseDir}/assets/blacklists/mm10-blacklist.bed" - } - 'bosTau8' { - fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" - mito_name = "chrM" - } - 'ce10' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "9e7" - } - 'canFam3' { - fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" - mito_name = "chrM" - } - 'danRer10' { - fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "1.37e9" - } - 'dm6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "1.2e8" - } - 'equCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" - mito_name = "chrM" - } - 'galGal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" - mito_name = "chrM" - } - 'panTro4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" - mito_name = "chrM" - } - 'rn6' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" - mito_name = "chrM" - } - 'sacCer3' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" - readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "1.2e7" - } - 'susScr3' { - fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" - mito_name = "chrM" - } - } -} From 31744bb8ab3dcccb29c491209a5ad38e50c5c1dd Mon Sep 17 00:00:00 2001 From: cgpu <38183826+cgpu@users.noreply.github.com> Date: Sun, 8 Nov 2020 00:03:36 +0000 Subject: [PATCH 25/36] Fixes typos caught by review-dog --- .github/ISSUE_TEMPLATE/bug_report.md | 2 +- Dockerfile | 4 ++-- README.md | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 47b4f67..56bbddf 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -19,7 +19,7 @@ Steps to reproduce the behavior: 1. Command line: 2. See error: -## Expected behaviour +## Expected behavior diff --git a/Dockerfile b/Dockerfile index 9908de1..0d5cccf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,8 +22,8 @@ RUN conda env export --name proteogenomics-base > proteogenomics-base.yml RUN touch .Rprofile RUN touch .Renviron -# Copy additonal scripts from bin and add to PATH +# Copy additional scripts from bin and add to PATH RUN mkdir /opt/bin COPY bin/* /opt/bin/ RUN chmod +x /opt/bin/* -ENV PATH="$PATH:/opt/bin/" \ No newline at end of file +ENV PATH="$PATH:/opt/bin/" diff --git a/README.md b/README.md index 43a6f33..aa6ef4e 100644 --- a/README.md +++ b/README.md @@ -34,15 +34,15 @@ Protein isoforms are the direct translational product of fully spliced mRNA mole ### Protein Isoforms -Knowledge of a full-length transcriptome can provide for an empirically-derived predicted set of protein isoforms, which can serve as accurate and more precise models for protein inference. Third generation sequencing, exemplified recently by [Pac-Bio](https://www.pacb.com/) can, for the first time, shed light on full-length protein isoforms. Until now, protein isoforms were inferred through transript reconstruction on next generation sequencing data. However, this was a frought process with many errors. With the advent of long-read sequencing, we can observe full-length, fully-spliced mRNA transcripts that can be translated into protein sequencing for use in subsequent mass spectrometry experiments. A major remaining challenge is the identificaton of all open reading frames (ORFs). +Knowledge of a full-length transcriptome can provide for an empirically-derived predicted set of protein isoforms, which can serve as accurate and more precise models for protein inference. Third generation sequencing, exemplified recently by [Pac-Bio](https://www.pacb.com/) can, for the first time, shed light on full-length protein isoforms. Until now, protein isoforms were inferred through transript reconstruction on next generation sequencing data. However, this was a frought process with many errors. With the advent of long-read sequencing, we can observe full-length, fully-spliced mRNA transcripts that can be translated into protein sequencing for use in subsequent mass spectrometry experiments. A major remaining challenge is the identification of all open reading frames (ORFs). ### Mass Spectrometry Data -Bottom-up mass spectrometry is the premeir method for protein identification. Mass-spectrometry, as as technology, provides a means to rapidly identify peptides produces by proteolytic digestion of intact proteins isoforms. It is fast and sensitive. Well done experiments frequently identify as many as 10,000 proteins in a single analysis. Yet, much can be done to improve the depth and accuracy of such experiments, especially for comprehensive identification of protein isoforms. First and foremost, the dominant choice of protease for bottom-up mass spectrometry is trypsin. Trypsin digest whole proteins into managealbe peptides that are easily separated by HPLC and identified by mass spectrometry. However, identifiction of a protein isoform requires at minimum a single peptide that can be uniquely ascribed to that isoform. Here, trypsin alone simply cannot deliver enough unique peptides to identify all protein isoforms in a sample. The reason is that many tryptic peptides are too short or too long for effective mass-spec analysis. In addtion, many tryptic peptides are shared between many protein isoforms giving them litte informative value. Recently, [Miller](https://pubs.acs.org/doi/10.1021/acs.jproteome.9b00330) demonstrated that use of multiple proteases for a single sample, can greatly improve protein inference by significantly increasing the number of unique peptides detetected. Frequently, protein isoforms can have multiple unique peptides for added identification confidence. +Bottom-up mass spectrometry is the premier method for protein identification. Mass-spectrometry, as as technology, provides a means to rapidly identify peptides produces by proteolytic digestion of intact proteins isoforms. It is fast and sensitive. Well done experiments frequently identify as many as 10,000 proteins in a single analysis. Yet, much can be done to improve the depth and accuracy of such experiments, especially for comprehensive identification of protein isoforms. First and foremost, the dominant choice of protease for bottom-up mass spectrometry is trypsin. Trypsin digest whole proteins into manageable peptides that are easily separated by HPLC and identified by mass spectrometry. However, identification of a protein isoform requires at minimum a single peptide that can be uniquely ascribed to that isoform. Here, trypsin alone simply cannot deliver enough unique peptides to identify all protein isoforms in a sample. The reason is that many tryptic peptides are too short or too long for effective mass-spec analysis. In addition, many tryptic peptides are shared between many protein isoforms giving them litte informative value. Recently, [Miller](https://pubs.acs.org/doi/10.1021/acs.jproteome.9b00330) demonstrated that use of multiple proteases for a single sample, can greatly improve protein inference by significantly increasing the number of unique peptides detected. Frequently, protein isoforms can have multiple unique peptides for added identification confidence. ### Protein Isoform Inference -[Protein inference](https://www.sciencedirect.com/science/article/abs/pii/S187439191630344X?via%3Dihub) is the process of guessing which proteins are present in a sample based on limited peptide evidence. Bottom-up proteomics, by definition, deals only in peptides, which are the pieces of a protein available for analyis following digestion with a protease. Top-down proteomics would be the preferred method for protein isoform detections because it analyses intact proteoforms. However, at the present time, it lacks the sensitivty that bottom-up has, yielding only fractional proteome coverage. In bottom-up, a key challenge is taking all of the identified peptides and then attempting to use them to infer presence of the protein isoforms from which they were derived. This process is aided greatly by deeper coverage of peptides unique to each isoform in the sample. Still it is not a solved problem. Here, in this project, we will integrate protein isoform presence as measured by copy number from the Pac-Bio data as a Bayesian prior in the protein inference algorithm. +[Protein inference](https://www.sciencedirect.com/science/article/abs/pii/S187439191630344X?via%3Dihub) is the process of guessing which proteins are present in a sample based on limited peptide evidence. Bottom-up proteomics, by definition, deals only in peptides, which are the pieces of a protein available for analysis following digestion with a protease. Top-down proteomics would be the preferred method for protein isoform detections because it analyses intact proteoforms. However, at the present time, it lacks the sensitivity that bottom-up has, yielding only fractional proteome coverage. In bottom-up, a key challenge is taking all of the identified peptides and then attempting to use them to infer presence of the protein isoforms from which they were derived. This process is aided greatly by deeper coverage of peptides unique to each isoform in the sample. Still it is not a solved problem. Here, in this project, we will integrate protein isoform presence as measured by copy number from the Pac-Bio data as a Bayesian prior in the protein inference algorithm. ## Contributors From 2388635421c351feb87fd895d546620c16704f02 Mon Sep 17 00:00:00 2001 From: cgpu Date: Sat, 7 Nov 2020 19:08:38 -0500 Subject: [PATCH 26/36] Adds tentative LICENSE --- LICENSE_nextflow => LICENSE | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename LICENSE_nextflow => LICENSE (100%) diff --git a/LICENSE_nextflow b/LICENSE similarity index 100% rename from LICENSE_nextflow rename to LICENSE From 50b80581e6de79fc6cff4981d64c032f9c19e6ef Mon Sep 17 00:00:00 2001 From: cgpu Date: Sun, 8 Nov 2020 11:27:58 -0500 Subject: [PATCH 27/36] Adds environment.yml with pandas, numpy, biopython --- environment.yml | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/environment.yml b/environment.yml index d958eec..ace47e1 100644 --- a/environment.yml +++ b/environment.yml @@ -6,11 +6,30 @@ channels: - bioconda - defaults dependencies: - - conda-forge::python=3.7.3 - - conda-forge::markdown=3.1.1 - - conda-forge::pymdown-extensions=6.0 - - conda-forge::pygments=2.5.2 - - bioconda::fastqc=0.11.8 - - bioconda::multiqc=1.7 + # General utils + - python=3.7.3 + - markdown=3.1.1 + - pymdown-extensions=6.0 + - pygments=2.5.2 + - multiqc=1.7 + - biopython + # Module 1: SMARTLink - CCS - pbccs - - + # Module 2: Iso-Seq 3 + - isoseq3 + - lima + - pbmm2 + - pbcoretools + - bamtools + # Module 3: SQANTI3, seperate docker image + # Module 4: CPAT + # Module 5: 6 Frame Translation + - biopython + # Module 6: Transcriptome Summary + - numpy + - pandas + # Module 7: ORF Calling + # Module 8: Refined Db Generation + # Module 9: Db Annotation + # Module 10: MetaMorpheus + From 7672e1601906f64e88968a1b58778a6c88e15a20 Mon Sep 17 00:00:00 2001 From: cgpu Date: Sun, 8 Nov 2020 11:28:30 -0500 Subject: [PATCH 28/36] Adds CCS process --- main.nf | 82 +++++++++++++++++++++++++++++++++++++++++++++++++ nextflow.config | 4 +++ 2 files changed, 86 insertions(+) diff --git a/main.nf b/main.nf index 30fac88..b8879dc 100644 --- a/main.nf +++ b/main.nf @@ -74,6 +74,41 @@ summary['Config Profile'] = workflow.profile log.info summary.collect { k,v -> "${k.padRight(18)}: $v" }.join("\n") log.info "-\033[2m--------------------------------------------------\033[0m-" +/* + * STEP - validate template + */ + +ch_raw_pacbio_bams_folder = params.raw_pacbio_bams_folder ? Channel.fromPath("${params.raw_pacbio_bams_folder}/*.{bam,bam.bai}") : null +ch_design_reads_csv = params.input ? Channel.fromPath("${params.input}") : null + +// Fail early: Do not allow the user to provide input file and input folder simultaneously +if (params.input && params.raw_pacbio_bams_folder ) { + exit 1, "You cannot provide an input file and input folder simultaneously. File: ${params.input}\n, Folder: ${params.raw_pacbio_bams_folder},\nSee --help for more information" +} + +// Fail early: Do not allow the user to provide neither input file nor input folder +if (!params.input && !params.raw_pacbio_bams_folder ) { + exit 1, "Malformed row in TSV file: ${row}, see --help for more information" +} + +// If the user has provided input folder +if (!params.input && params.raw_pacbio_bams_folder ) { + ch_raw_pacbio_bams_folder + .map { it -> [ it ] } + .set { ch_raw_pacbio_subreads_bams } +} + +// // If the user has provided input design file +// if (params.input && !params.raw_pacbio_bams_folder ) { +// ch_design_reads_csv +// .splitCsv(header:true, sep:',') +// .map { row -> [ file(row).simpleName, bam, bai ] } +// .into { ch_raw_pacbio_subreads_bams } +// } + +ch_raw_pacbio_subreads_bams.view() + + /* * STEP - validate template */ @@ -90,6 +125,52 @@ process validate { """ } +/* + * Module 1: SMARTLink - CCS + */ + +process smartlink_ccs { + tag "${sample}" + publishDir "${params.outdir}/smartlink_ccs/", mode: params.publish_dir_mode + + input: + set val(sample), file(raw_pacbio_subreads_bam), file(raw_pacbio_subreads_bai) from ch_raw_pacbio_subreads_bams + + output: + set val("${sample}"), file("${sample}*bam"), file("${sample}*bai") into ch_ccs_pacbio_bams + + script: + """ + echo "when in pairs:" + echo "simpleName:${sample}\nbam:${raw_pacbio_subreads_bam}\nbai:${raw_pacbio_subreads_bai}" + echo "simpleName:${sample}\nbam:${raw_pacbio_subreads_bam}\nbai:${raw_pacbio_subreads_bai}" > "${sample}_fake_input.txt" + """ +} + +process isoseq3 { + tag "${sample}" + publishDir "${params.outdir}/isoseq3/", mode: params.publish_dir_mode + + input: + set val("${sample}"), file("${sample}*bam"), file(bai) from ch_ccs_pacbio_bams + + output: + file("*completed.bam") + + script: + """ + echo "when in pairs:" + echo "simpleName:${sample}\nbam:${raw_pacbio_subreads_bam}\nbai:${raw_pacbio_subreads_bai}" + echo "simpleName:${sample}\nbam:${raw_pacbio_subreads_bam}\nbai:${raw_pacbio_subreads_bai}" > "${sample}_fake_input.txt" + """ +} + + + + + + + def logHeader() { // Log colors ANSI codes c_black = params.monochrome_logs ? '' : "\033[0;30m"; @@ -128,3 +209,4 @@ def logHeader() { -${c_dim}--------------------------------------------------${c_reset}- """.stripIndent() } + diff --git a/nextflow.config b/nextflow.config index 0ffd62d..5ce1bbd 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,6 +20,10 @@ params { max_cpus = 16 max_time = 240.h + // Module 1: SMARTLink - CCS + raw_pacbio_bams_suffix = 'bam' + raw_pacbio_bams_index_suffix = 'bai' // CAUTION: be sure that you declare bam.bai if using it + } // Container slug. Stable releases should specify release tag! From f04dc7c9d8854ce58eb9022dd1458d4d86fe84b0 Mon Sep 17 00:00:00 2001 From: cgpu Date: Sun, 8 Nov 2020 15:41:11 -0500 Subject: [PATCH 29/36] Adds pbbam (required for ccs --chunk subsequent routine) --- environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.yml b/environment.yml index ace47e1..13ecbce 100644 --- a/environment.yml +++ b/environment.yml @@ -15,6 +15,7 @@ dependencies: - biopython # Module 1: SMARTLink - CCS - pbccs + - pbbam # Module 2: Iso-Seq 3 - isoseq3 - lima From df6bd40a90ed5b2508c946d189d7302273870aaa Mon Sep 17 00:00:00 2001 From: cgpu Date: Sun, 8 Nov 2020 18:15:25 -0500 Subject: [PATCH 30/36] Adds pbindex, ccs processes (w/ parallel --chunks) --- main.nf | 103 +++++++++++++++++++++++++++++++----------------- nextflow.config | 12 ++++-- 2 files changed, 76 insertions(+), 39 deletions(-) diff --git a/main.nf b/main.nf index b8879dc..bd665b2 100644 --- a/main.nf +++ b/main.nf @@ -78,36 +78,32 @@ log.info "-\033[2m--------------------------------------------------\033[0m-" * STEP - validate template */ -ch_raw_pacbio_bams_folder = params.raw_pacbio_bams_folder ? Channel.fromPath("${params.raw_pacbio_bams_folder}/*.{bam,bam.bai}") : null -ch_design_reads_csv = params.input ? Channel.fromPath("${params.input}") : null +// Fail early: Nothing to analyse if the user does not provide an input pb_bams_folder +if (!params.pb_bams_folder ) { + exit 1, "Please provide an input folder with --pb_bams_folder to proceed, see --help for more information" +} -// Fail early: Do not allow the user to provide input file and input folder simultaneously -if (params.input && params.raw_pacbio_bams_folder ) { - exit 1, "You cannot provide an input file and input folder simultaneously. File: ${params.input}\n, Folder: ${params.raw_pacbio_bams_folder},\nSee --help for more information" +if (params.pb_bams_folder && hasExtension(params.pb_bams_folder, "tar.gz")) { + ch_pb_bams_folder_tar_gz = Channel.fromPath(params.pb_bams_folder) } -// Fail early: Do not allow the user to provide neither input file nor input folder -if (!params.input && !params.raw_pacbio_bams_folder ) { - exit 1, "Malformed row in TSV file: ${row}, see --help for more information" +if (params.pb_bams_folder && !hasExtension(params.pb_bams_folder, "tar.gz")) { +ch_pb_bams_folder = params.pb_bams_folder ? Channel.fromFilePairs("${params.pb_bams_folder}/*.{bam,${params.bai_suffix}}", flat: true) : null } // If the user has provided input folder -if (!params.input && params.raw_pacbio_bams_folder ) { - ch_raw_pacbio_bams_folder - .map { it -> [ it ] } - .set { ch_raw_pacbio_subreads_bams } +if (params.pb_bams_folder ) { + ch_pb_bams_folder + .set { ch_pb_subreads_bams } } -// // If the user has provided input design file -// if (params.input && !params.raw_pacbio_bams_folder ) { -// ch_design_reads_csv -// .splitCsv(header:true, sep:',') -// .map { row -> [ file(row).simpleName, bam, bai ] } -// .into { ch_raw_pacbio_subreads_bams } -// } +(ch_pb_subreads_bams_for_pbi, +ch_pb_subreads_bams_to_display) = ch_pb_subreads_bams.into(2) -ch_raw_pacbio_subreads_bams.view() +ch_pb_subreads_bams_to_display.view() +ch_ccs_chunks = Channel.from(1.."${params.number_of_ccs_chunks}".toInteger()) +(ch_ccs_chunks, ch_ccs_chunks_to_display) = ch_ccs_chunks.into(2) /* * STEP - validate template @@ -129,42 +125,70 @@ process validate { * Module 1: SMARTLink - CCS */ -process smartlink_ccs { - tag "${sample}" - publishDir "${params.outdir}/smartlink_ccs/", mode: params.publish_dir_mode +// Generate pbi index required for using the ccs --chunk parallelisation +process generate_pbi { + tag "${pb_subreads_bam.simpleName}" + cpus 1 + echo true input: - set val(sample), file(raw_pacbio_subreads_bam), file(raw_pacbio_subreads_bai) from ch_raw_pacbio_subreads_bams + set val(sample), file(pb_subreads_bam), file(pb_subreads_bai) from ch_pb_subreads_bams_for_pbi output: - set val("${sample}"), file("${sample}*bam"), file("${sample}*bai") into ch_ccs_pacbio_bams + set val("${pb_subreads_bam.simpleName}"), + file("${pb_subreads_bam.baseName}.bam"), + file("${pb_subreads_bam.baseName}.bam.bai"), + file("${pb_subreads_bam.baseName}.bam.pbi") into ch_pb_subreads_bams_for_ccs script: """ - echo "when in pairs:" - echo "simpleName:${sample}\nbam:${raw_pacbio_subreads_bam}\nbai:${raw_pacbio_subreads_bai}" - echo "simpleName:${sample}\nbam:${raw_pacbio_subreads_bam}\nbai:${raw_pacbio_subreads_bai}" > "${sample}_fake_input.txt" + pbindex ${pb_subreads_bam} """ } -process isoseq3 { - tag "${sample}" - publishDir "${params.outdir}/isoseq3/", mode: params.publish_dir_mode +ch_ccs_chucked_bams = ch_ccs_chunks.combine(ch_pb_subreads_bams_for_ccs) + +process smartlink_ccs { + tag "sample:${sample},chunk:${ith_chunk}" + publishDir "${params.outdir}/smartlink_ccs/", mode: params.publish_dir_mode + cpus 1 input: - set val("${sample}"), file("${sample}*bam"), file(bai) from ch_ccs_pacbio_bams + set val(ith_chunk), val(sample), file(pb_subreads_bam), file(pb_subreads_bai), file(pb_subreads_bai) from ch_ccs_chucked_bams output: - file("*completed.bam") + set val("${sample}"), + file("${sample}.ccs.${ith_chunk}.bam"), + file("${sample}.ccs.${ith_chunk}.bam.bai"), + file("${sample}.ccs.${ith_chunk}.bam.pbi") into ch_ccs_pacbio_bams script: + // Hardcoded example from docs: + // ccs movie.subreads.bam movie.ccs.1.bam --chunk 1/10 -j """ - echo "when in pairs:" - echo "simpleName:${sample}\nbam:${raw_pacbio_subreads_bam}\nbai:${raw_pacbio_subreads_bai}" - echo "simpleName:${sample}\nbam:${raw_pacbio_subreads_bam}\nbai:${raw_pacbio_subreads_bai}" > "${sample}_fake_input.txt" + # ccs ${pb_subreads_bam} ${sample}.ccs.${ith_chunk}.bam --chunk ${ith_chunk}/${params.number_of_ccs_chunks} -j ${task.cpus} + touch ${sample}.ccs.${ith_chunk}.bam ${sample}.ccs.${ith_chunk}.bam.bai ${sample}.ccs.${ith_chunk}.bam.pbi """ } +// process isoseq3 { +// tag "${sample}" +// publishDir "${params.outdir}/isoseq3/", mode: params.publish_dir_mode + +// input: +// set val("${sample}"), file("${sample}*bam"), file(bai) from ch_ccs_pacbio_bams + +// output: +// file("*completed.bam") + +// script: +// """ +// echo "when in pairs:" +// echo "simpleName:${sample}\nbam:${pb_subreads_bam}\nbai:${pb_subreads_bai}" +// echo "simpleName:${sample}\nbam:${pb_subreads_bam}\nbai:${pb_subreads_bai}" > "${sample}_fake_input.txt" +// """ +// } + @@ -210,3 +234,10 @@ def logHeader() { """.stripIndent() } +// Functions +// Credits for most of the functions to https://github.com/nf-core/sarek developers + +// Check file extension +def hasExtension(it, extension) { + it.toString().toLowerCase().endsWith(extension.toLowerCase()) +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 5ce1bbd..8f8f51f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -21,14 +21,20 @@ params { max_time = 240.h // Module 1: SMARTLink - CCS - raw_pacbio_bams_suffix = 'bam' - raw_pacbio_bams_index_suffix = 'bai' // CAUTION: be sure that you declare bam.bai if using it + bai_suffix = 'bam.bai' // CAUTION: be sure that you declare bam.bai or .bai explicitly + pb_bams_folder = 'testdata' + number_of_ccs_chunks = 10 } // Container slug. Stable releases should specify release tag! // Developmental code should specify :dev -process.container = 'sheynkmanlab/proteogenomics-base:dev' + +docker.enabled = true + +process { + container = 'cgpu/proteogenomics:1.0dev' + } profiles { docker { From f9b61534567cc7f88614236aad88df3bf19bb8ea Mon Sep 17 00:00:00 2001 From: cgpu Date: Sun, 8 Nov 2020 19:04:08 -0500 Subject: [PATCH 31/36] Removes redundant bai (pbi is needed) --- main.nf | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/main.nf b/main.nf index bd665b2..18cbeb6 100644 --- a/main.nf +++ b/main.nf @@ -88,7 +88,8 @@ if (params.pb_bams_folder && hasExtension(params.pb_bams_folder, "tar.gz")) { } if (params.pb_bams_folder && !hasExtension(params.pb_bams_folder, "tar.gz")) { -ch_pb_bams_folder = params.pb_bams_folder ? Channel.fromFilePairs("${params.pb_bams_folder}/*.{bam,${params.bai_suffix}}", flat: true) : null +// ch_pb_bams_folder = params.pb_bams_folder ? Channel.fromFilePairs("${params.pb_bams_folder}/*.{bam,${params.bai_suffix}}", flat: true) : null +ch_pb_bams_folder = params.pb_bams_folder ? Channel.fromPath("${params.pb_bams_folder}/*.bam") : null } // If the user has provided input folder @@ -132,12 +133,11 @@ process generate_pbi { echo true input: - set val(sample), file(pb_subreads_bam), file(pb_subreads_bai) from ch_pb_subreads_bams_for_pbi + file(pb_subreads_bam) from ch_pb_subreads_bams_for_pbi output: set val("${pb_subreads_bam.simpleName}"), file("${pb_subreads_bam.baseName}.bam"), - file("${pb_subreads_bam.baseName}.bam.bai"), file("${pb_subreads_bam.baseName}.bam.pbi") into ch_pb_subreads_bams_for_ccs script: @@ -154,12 +154,11 @@ process smartlink_ccs { cpus 1 input: - set val(ith_chunk), val(sample), file(pb_subreads_bam), file(pb_subreads_bai), file(pb_subreads_bai) from ch_ccs_chucked_bams + set val(ith_chunk), val(sample), file(pb_subreads_bam), file(pb_subreads_bai) from ch_ccs_chucked_bams output: set val("${sample}"), file("${sample}.ccs.${ith_chunk}.bam"), - file("${sample}.ccs.${ith_chunk}.bam.bai"), file("${sample}.ccs.${ith_chunk}.bam.pbi") into ch_ccs_pacbio_bams script: From 76ab7a8bb248c0de5df35c56c6a3eb4d440d8c78 Mon Sep 17 00:00:00 2001 From: cgpu Date: Sun, 8 Nov 2020 20:27:36 -0500 Subject: [PATCH 32/36] Adds temp process mock ccs and flag for testing --- main.nf | 63 ++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/main.nf b/main.nf index 18cbeb6..3178c31 100644 --- a/main.nf +++ b/main.nf @@ -148,26 +148,51 @@ process generate_pbi { ch_ccs_chucked_bams = ch_ccs_chunks.combine(ch_pb_subreads_bams_for_ccs) -process smartlink_ccs { - tag "sample:${sample},chunk:${ith_chunk}" - publishDir "${params.outdir}/smartlink_ccs/", mode: params.publish_dir_mode - cpus 1 - - input: - set val(ith_chunk), val(sample), file(pb_subreads_bam), file(pb_subreads_bai) from ch_ccs_chucked_bams - - output: - set val("${sample}"), - file("${sample}.ccs.${ith_chunk}.bam"), - file("${sample}.ccs.${ith_chunk}.bam.pbi") into ch_ccs_pacbio_bams +if (!params.mock_ccs) { + process smartlink_ccs { + tag "sample:${sample},chunk:${ith_chunk}" + publishDir "${params.outdir}/smartlink_ccs/", mode: params.publish_dir_mode + cpus 1 + + input: + set val(ith_chunk), val(sample), file(pb_subreads_bam), file(pb_subreads_bai) from ch_ccs_chucked_bams + + output: + set val("${sample}"), + file("${sample}.ccs.${ith_chunk}.bam"), + file("${sample}.ccs.${ith_chunk}.bam.pbi") into ch_ccs_pacbio_bams + + script: + // Hardcoded example from docs: + // ccs movie.subreads.bam movie.ccs.1.bam --chunk 1/10 -j + """ + ccs ${pb_subreads_bam} ${sample}.ccs.${ith_chunk}.bam --chunk ${ith_chunk}/${params.number_of_ccs_chunks} -j ${task.cpus} + """ + } +} - script: - // Hardcoded example from docs: - // ccs movie.subreads.bam movie.ccs.1.bam --chunk 1/10 -j - """ - # ccs ${pb_subreads_bam} ${sample}.ccs.${ith_chunk}.bam --chunk ${ith_chunk}/${params.number_of_ccs_chunks} -j ${task.cpus} - touch ${sample}.ccs.${ith_chunk}.bam ${sample}.ccs.${ith_chunk}.bam.bai ${sample}.ccs.${ith_chunk}.bam.pbi - """ +if (params.mock_ccs) { + process smartlink_ccs_mock { + tag "sample:${sample},chunk:${ith_chunk}" + publishDir "${params.outdir}/smartlink_ccs/", mode: params.publish_dir_mode + cpus 1 + + input: + set val(ith_chunk), val(sample), file(pb_subreads_bam), file(pb_subreads_bai) from ch_ccs_chucked_bams + + output: + set val("${sample}"), + file("${sample}.ccs.${ith_chunk}.bam"), + file("${sample}.ccs.${ith_chunk}.bam.pbi") into ch_ccs_pacbio_bams + + script: + // Hardcoded example from docs: + // ccs movie.subreads.bam movie.ccs.1.bam --chunk 1/10 -j + """ + # ccs ${pb_subreads_bam} ${sample}.ccs.${ith_chunk}.bam --chunk ${ith_chunk}/${params.number_of_ccs_chunks} -j ${task.cpus} + touch ${sample}.ccs.${ith_chunk}.bam ${sample}.ccs.${ith_chunk}.bam.bai ${sample}.ccs.${ith_chunk}.bam.pbi + """ + } } // process isoseq3 { From 512069568119cf99ea5700ef3cceaf26911fae0f Mon Sep 17 00:00:00 2001 From: cgpu <38183826+cgpu@users.noreply.github.com> Date: Tue, 24 Nov 2020 22:35:55 +0200 Subject: [PATCH 33/36] Corrects typo caught by reviewdog gh-action --- environment.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index 13ecbce..d50c509 100644 --- a/environment.yml +++ b/environment.yml @@ -22,7 +22,7 @@ dependencies: - pbmm2 - pbcoretools - bamtools - # Module 3: SQANTI3, seperate docker image + # Module 3: SQANTI3, separate docker image # Module 4: CPAT # Module 5: 6 Frame Translation - biopython @@ -33,4 +33,3 @@ dependencies: # Module 8: Refined Db Generation # Module 9: Db Annotation # Module 10: MetaMorpheus - From 34e3db53592bfd25536519d85c479d867a83df48 Mon Sep 17 00:00:00 2001 From: cgpu <38183826+cgpu@users.noreply.github.com> Date: Tue, 24 Nov 2020 22:36:37 +0200 Subject: [PATCH 34/36] Typo fix --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index e24a933..e6018a9 100644 --- a/main.nf +++ b/main.nf @@ -78,7 +78,7 @@ log.info "-\033[2m--------------------------------------------------\033[0m-" * STEP - validate template */ -// Fail early: Nothing to analyse if the user does not provide an input pb_bams_folder +// Fail early: Nothing to analyze if the user does not provide an input pb_bams_folder if (!params.pb_bams_folder ) { exit 1, "Please provide an input folder with --pb_bams_folder to proceed, see --help for more information" } From c5c8317f63c777d236a83e1a5690c7bb6cb8ee29 Mon Sep 17 00:00:00 2001 From: cgpu <38183826+cgpu@users.noreply.github.com> Date: Tue, 24 Nov 2020 22:38:52 +0200 Subject: [PATCH 35/36] Deletes commented out section To respect the rule, "we do not choose to modify cod ebehaviour by commenting in and out code chunks", --- main.nf | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/main.nf b/main.nf index e6018a9..3a0bcca 100644 --- a/main.nf +++ b/main.nf @@ -195,23 +195,6 @@ if (params.mock_ccs) { } } -// process isoseq3 { -// tag "${sample}" -// publishDir "${params.outdir}/isoseq3/", mode: params.publish_dir_mode - -// input: -// set val("${sample}"), file("${sample}*bam"), file(bai) from ch_ccs_pacbio_bams - -// output: -// file("*completed.bam") - -// script: -// """ -// echo "when in pairs:" -// echo "simpleName:${sample}\nbam:${pb_subreads_bam}\nbai:${pb_subreads_bai}" -// echo "simpleName:${sample}\nbam:${pb_subreads_bam}\nbai:${pb_subreads_bai}" > "${sample}_fake_input.txt" -// """ -// } From 38a5079a852bf6f0d3dc3db400ea31d585b04438 Mon Sep 17 00:00:00 2001 From: cgpu <38183826+cgpu@users.noreply.github.com> Date: Tue, 24 Nov 2020 22:40:43 +0200 Subject: [PATCH 36/36] Makes the section note more informative --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 3a0bcca..3475f0e 100644 --- a/main.nf +++ b/main.nf @@ -75,7 +75,7 @@ log.info summary.collect { k,v -> "${k.padRight(18)}: $v" }.join("\n") log.info "-\033[2m--------------------------------------------------\033[0m-" /* - * STEP - validate template + * Configuring channels based on input parameters */ // Fail early: Nothing to analyze if the user does not provide an input pb_bams_folder
${v ?: 'N/A'}