diff --git a/IEEE_Docs/Draft P2791 Timeline - updated 7 Dec 2018.xls b/IEEE_Docs/Draft P2791 Timeline - updated 7 Dec 2018.xls deleted file mode 100644 index 2635bd9..0000000 Binary files a/IEEE_Docs/Draft P2791 Timeline - updated 7 Dec 2018.xls and /dev/null differ diff --git a/IEEE_Docs/EMB MWG P&P_P2791.docx b/IEEE_Docs/EMB MWG P&P_P2791.docx deleted file mode 100644 index c223b49..0000000 Binary files a/IEEE_Docs/EMB MWG P&P_P2791.docx and /dev/null differ diff --git a/IEEE_Docs/IEEE Entity CLA_BSD-3 with Appendix A_081717.pdf b/IEEE_Docs/IEEE Entity CLA_BSD-3 with Appendix A_081717.pdf deleted file mode 100644 index 29e7be4..0000000 Binary files a/IEEE_Docs/IEEE Entity CLA_BSD-3 with Appendix A_081717.pdf and /dev/null differ diff --git a/IEEE_Docs/IEEE Entity CLA_BSD-3_081717.pdf b/IEEE_Docs/IEEE Entity CLA_BSD-3_081717.pdf deleted file mode 100644 index fa3bfda..0000000 Binary files a/IEEE_Docs/IEEE Entity CLA_BSD-3_081717.pdf and /dev/null differ diff --git a/IEEE_Docs/IEEE Individual CLA_BSD-3 with Appendix A_081717.pdf b/IEEE_Docs/IEEE Individual CLA_BSD-3 with Appendix A_081717.pdf deleted file mode 100644 index e2d45a2..0000000 Binary files a/IEEE_Docs/IEEE Individual CLA_BSD-3 with Appendix A_081717.pdf and /dev/null differ diff --git a/IEEE_Docs/IEEE Individual CLA_BSD-3_081717.pdf b/IEEE_Docs/IEEE Individual CLA_BSD-3_081717.pdf deleted file mode 100644 index 60964f1..0000000 Binary files a/IEEE_Docs/IEEE Individual CLA_BSD-3_081717.pdf and /dev/null differ diff --git a/IEEE_Docs/LC_Sponsor_Ballot_Overview_1July2016.ppt b/IEEE_Docs/LC_Sponsor_Ballot_Overview_1July2016.ppt deleted file mode 100644 index 1208885..0000000 Binary files a/IEEE_Docs/LC_Sponsor_Ballot_Overview_1July2016.ppt and /dev/null differ diff --git a/IEEE_Docs/Meetings/2018August29/08292018_Agenda.doc b/IEEE_Docs/Meetings/2018August29/08292018_Agenda.doc deleted file mode 100644 index 9d034fd..0000000 Binary files a/IEEE_Docs/Meetings/2018August29/08292018_Agenda.doc and /dev/null differ diff --git a/IEEE_Docs/Meetings/2018August29/08292018_EMB MWG P&P_P2791_JGK_MOD.docx b/IEEE_Docs/Meetings/2018August29/08292018_EMB MWG P&P_P2791_JGK_MOD.docx deleted file mode 100644 index 2288e83..0000000 Binary files a/IEEE_Docs/Meetings/2018August29/08292018_EMB MWG P&P_P2791_JGK_MOD.docx and /dev/null differ diff --git a/IEEE_Docs/Meetings/2018August29/08292018_Minutes.doc b/IEEE_Docs/Meetings/2018August29/08292018_Minutes.doc deleted file mode 100644 index 66490e6..0000000 Binary files a/IEEE_Docs/Meetings/2018August29/08292018_Minutes.doc and /dev/null differ diff --git a/IEEE_Docs/Meetings/2018August29/08292018_P2791_PAR_Detail.pdf b/IEEE_Docs/Meetings/2018August29/08292018_P2791_PAR_Detail.pdf deleted file mode 100644 index e742840..0000000 Binary files a/IEEE_Docs/Meetings/2018August29/08292018_P2791_PAR_Detail.pdf and /dev/null differ diff --git a/IEEE_Docs/Meetings/2018August29/08292018_Roberts-Rules-of-Order.pdf b/IEEE_Docs/Meetings/2018August29/08292018_Roberts-Rules-of-Order.pdf deleted file mode 100644 index 3079b64..0000000 Binary files a/IEEE_Docs/Meetings/2018August29/08292018_Roberts-Rules-of-Order.pdf and /dev/null differ diff --git a/IEEE_Docs/Meetings/2018August29/08292018_VotingRoster_Original.docx b/IEEE_Docs/Meetings/2018August29/08292018_VotingRoster_Original.docx deleted file mode 100644 index 777e28d..0000000 Binary files a/IEEE_Docs/Meetings/2018August29/08292018_VotingRoster_Original.docx and /dev/null differ diff --git a/IEEE_Docs/Meetings/2018August29/08292018_Welcome to the IEEE-SA_Final.pdf b/IEEE_Docs/Meetings/2018August29/08292018_Welcome to the IEEE-SA_Final.pdf deleted file mode 100644 index 98fe364..0000000 Binary files a/IEEE_Docs/Meetings/2018August29/08292018_Welcome to the IEEE-SA_Final.pdf and /dev/null differ diff --git a/IEEE_Docs/Meetings/2018December03/12032018_Agenda.doc b/IEEE_Docs/Meetings/2018December03/12032018_Agenda.doc deleted file mode 100644 index 9a8c285..0000000 Binary files a/IEEE_Docs/Meetings/2018December03/12032018_Agenda.doc and /dev/null differ diff --git a/IEEE_Docs/Meetings/2018December03/12032018_Minutes.doc b/IEEE_Docs/Meetings/2018December03/12032018_Minutes.doc deleted file mode 100644 index 50915c2..0000000 Binary files a/IEEE_Docs/Meetings/2018December03/12032018_Minutes.doc and /dev/null differ diff --git a/IEEE_Docs/Meetings/2018October22/10222018_Agenda.doc b/IEEE_Docs/Meetings/2018October22/10222018_Agenda.doc deleted file mode 100644 index b4ff3e4..0000000 Binary files a/IEEE_Docs/Meetings/2018October22/10222018_Agenda.doc and /dev/null differ diff --git a/IEEE_Docs/Meetings/2018October22/10222018_Minutes.doc b/IEEE_Docs/Meetings/2018October22/10222018_Minutes.doc deleted file mode 100644 index df9f5d6..0000000 Binary files a/IEEE_Docs/Meetings/2018October22/10222018_Minutes.doc and /dev/null differ diff --git a/IEEE_Docs/Meetings/2018October22/10222018_P2791_Draft_Standard.doc b/IEEE_Docs/Meetings/2018October22/10222018_P2791_Draft_Standard.doc deleted file mode 100644 index bff2df0..0000000 Binary files a/IEEE_Docs/Meetings/2018October22/10222018_P2791_Draft_Standard.doc and /dev/null differ diff --git a/IEEE_Docs/Meetings/2019May09/05092019_Agenda.doc b/IEEE_Docs/Meetings/2019May09/05092019_Agenda.doc deleted file mode 100644 index 219736d..0000000 Binary files a/IEEE_Docs/Meetings/2019May09/05092019_Agenda.doc and /dev/null differ diff --git a/IEEE_Docs/Meetings/2019May09/05092019_Minutes.doc b/IEEE_Docs/Meetings/2019May09/05092019_Minutes.doc deleted file mode 100644 index d8d6410..0000000 Binary files a/IEEE_Docs/Meetings/2019May09/05092019_Minutes.doc and /dev/null differ diff --git a/IEEE_Docs/Meetings/2019May09/Comment Resolution_RevCom.pptx b/IEEE_Docs/Meetings/2019May09/Comment Resolution_RevCom.pptx deleted file mode 100644 index 4983e2a..0000000 Binary files a/IEEE_Docs/Meetings/2019May09/Comment Resolution_RevCom.pptx and /dev/null differ diff --git a/IEEE_Docs/Meetings/2019May09/Individual_Roster_Public.xlsx b/IEEE_Docs/Meetings/2019May09/Individual_Roster_Public.xlsx deleted file mode 100644 index f7b6707..0000000 Binary files a/IEEE_Docs/Meetings/2019May09/Individual_Roster_Public.xlsx and /dev/null differ diff --git a/IEEE_Docs/Meetings/Individual_Roster_Public.xlsx b/IEEE_Docs/Meetings/Individual_Roster_Public.xlsx deleted file mode 100644 index 517c4b6..0000000 Binary files a/IEEE_Docs/Meetings/Individual_Roster_Public.xlsx and /dev/null differ diff --git a/IEEE_Docs/P2971_D3_Dec2018.doc b/IEEE_Docs/P2971_D3_Dec2018.doc deleted file mode 100644 index e96f3a1..0000000 Binary files a/IEEE_Docs/P2971_D3_Dec2018.doc and /dev/null differ diff --git a/IEEE_Docs/P2971_D3_Dec2018_JGK.doc b/IEEE_Docs/P2971_D3_Dec2018_JGK.doc deleted file mode 100644 index 26b26b7..0000000 Binary files a/IEEE_Docs/P2971_D3_Dec2018_JGK.doc and /dev/null differ diff --git a/IEEE_Docs/P2971_D3_Dec2018_JGK_Revised.doc b/IEEE_Docs/P2971_D3_Dec2018_JGK_Revised.doc deleted file mode 100644 index 687cfea..0000000 Binary files a/IEEE_Docs/P2971_D3_Dec2018_JGK_Revised.doc and /dev/null differ diff --git a/IEEE_Docs/P2971_D3_Dec2018_JGK_Revised_TrackChanges.doc b/IEEE_Docs/P2971_D3_Dec2018_JGK_Revised_TrackChanges.doc deleted file mode 100644 index d4641ec..0000000 Binary files a/IEEE_Docs/P2971_D3_Dec2018_JGK_Revised_TrackChanges.doc and /dev/null differ diff --git a/IEEE_Docs/P2971_D3_Dec2018_JGK_Revised_TrackChanges_C.doc b/IEEE_Docs/P2971_D3_Dec2018_JGK_Revised_TrackChanges_C.doc deleted file mode 100644 index 0a6e079..0000000 Binary files a/IEEE_Docs/P2971_D3_Dec2018_JGK_Revised_TrackChanges_C.doc and /dev/null differ diff --git a/IEEE_Docs/P2971_D3_Dec2018_JGK_Revised_TrackChanges_C_OpenSourceNotice.doc b/IEEE_Docs/P2971_D3_Dec2018_JGK_Revised_TrackChanges_C_OpenSourceNotice.doc deleted file mode 100644 index cb4b0b6..0000000 Binary files a/IEEE_Docs/P2971_D3_Dec2018_JGK_Revised_TrackChanges_C_OpenSourceNotice.doc and /dev/null differ diff --git a/IEEE_Docs/README.md b/IEEE_Docs/README.md deleted file mode 100644 index 2fd5f50..0000000 --- a/IEEE_Docs/README.md +++ /dev/null @@ -1,11 +0,0 @@ -IEEE Documents -============== -This repository contains the documents that are related to the [certification of the BCO specification](http://sites.ieee.org/sagroups-2791/* ) as an [IEEE](http://ieee.org) standard. - -Note that unless you are viewing a [release](https://github.com/biocompute-objects/BCO_Specification/releases) this is a draft subject to change. - -Table of content: -* [Current DRAFT of P2791](https://github.com/biocompute-objects/BCO_Specification/blob/master/IEEE_Docs/standard.md) (BioComput IEEE standard) [Click here for official Word file](https://github.com/biocompute-objects/BCO_Specification/blob/master/IEEE_Docs/P2971_D3_Dec2018_JGK.doc) -* [PAR submission form](./08292018_P2791_PAR_Detail.pdf) -* [Roberts Rules Of Order](./08292018_Roberts-Rules-of-Order.pdf) -* [IEEE Standards Association Overview](./08292018_Welcome%20to%20the%20IEEE-SA_Final.pdf) diff --git a/IEEE_Docs/standard.md b/IEEE_Docs/standard.md deleted file mode 100644 index 5b25538..0000000 --- a/IEEE_Docs/standard.md +++ /dev/null @@ -1,337 +0,0 @@ -# P2791™ Draft Standard for Bioinformatics Analyses Generated by High-Throughput Sequencing (HTS) to Facilitate Communication - -Sponsor - - **Standards Committee** - - of the - - **IEEE Engineering in Medicine and Biology Society** - - -Approved `` - -**IEEE-SA Standards Board** - - Copyright © 2018 by The Institute of Electrical and Electronics Engineers, Inc. - - Three Park Avenue - - New York, New York 10016-5997, USA - - - All rights reserved. - - -This document is an unapproved draft of a proposed IEEE Standard. As such, this document is subject to change. USE AT YOUR OWN RISK! IEEE copyright statements SHALL NOT BE REMOVED from draft or approved IEEE standards, or modified in any way. Because this is an unapproved draft, this document must not be utilized for any conformance/compliance purposes. Permission is hereby granted for officers from each IEEE Standards Working Group or Committee to reproduce the draft document developed by that Working Group for purposes of international standardization consideration. IEEE Standards Department must be informed of the submission for consideration prior to any reproduction for international standardization consideration (stds.ipr@ieee.org). Prior to adoption of this document, in whole or in part, by another standards development organization, permission must first be obtained from the IEEE Standards Department (stds.ipr@ieee.org). When requesting permission, IEEE Standards Department will require a copy of the standard development organization's document highlighting the use of IEEE content. Other entities seeking permission to reproduce this document, in whole or in part, must also obtain permission from the IEEE Standards Department. - - -IEEE Standards Department - -445 Hoes Lane - -Piscataway, NJ 08854, USA - - -### Abstract - -This standard establishes accurate and secure communication of bioinformatics protocols and data in order to facilitate bioinformatics workflow related exchange and communication between regulatory agencies, pharmaceutical companies, bioinformatics platform providers and researchers. Accurate communication helps ensure responsibility, reproducibility, verify bioinformatics protocol, track provenance information and promote interoperability. In addition, this standard also defines the assurance program for evaluating and certifying products against those requirements. - -### Keywords - -genomics, next generation sequencing, high throughput sequencing, massively parallel sequencing, NGS, HTS, MPS, workflow, pipeline, bioinformatics, analysis, regulatory - - -### Important Notices and Disclaimers Concerning IEEE Standards Documents - -IEEE documents are made available for use subject to important notices and legal disclaimers. These notices and disclaimers, or a reference to this page, appear in all standards and may be found under the heading “Important Notices and Disclaimers Concerning IEEE Standards Documents.” They can also be obtained on request from IEEE or viewed at http://standards.ieee.org/ipr/disclaimers.html. - -### Notice and Disclaimer of Liability Concerning the Use of IEEE Standards Documents - -IEEE Standards documents (standards, recommended practices, and guides), both full-use and trial-use, are developed within IEEE Societies and the Standards Coordinating Committees of the IEEE Standards Association (“IEEE-SA”) Standards Board. IEEE (“the Institute”) develops its standards through a consensus development process, approved by the American National Standards Institute (“ANSI”), which brings together volunteers representing varied viewpoints and interests to achieve the final product. IEEE Standards are documents developed through scientific, academic, and industry-based technical working groups. Volunteers in IEEE working groups are not necessarily members of the Institute and participate without compensation from IEEE. While IEEE administers the process and establishes rules to promote fairness in the consensus development process, IEEE does not independently evaluate, test, or verify the accuracy of any of the information or the soundness of any judgments contained in its standards. -IEEE Standards do not guarantee or ensure safety, security, health, or environmental protection, or ensure against interference with or from other devices or networks. Implementers and users of IEEE Standards documents are responsible for determining and complying with all appropriate safety, security, environmental, health, and interference protection practices and all applicable laws and regulations. -IEEE does not warrant or represent the accuracy or content of the material contained in its standards, and expressly disclaims all warranties (express, implied and statutory) not included in this or any other document relating to the standard, including, but not limited to, the warranties of: merchantability; fitness for a particular purpose; non-infringement; and quality, accuracy, effectiveness, currency, or completeness of material. In addition, IEEE disclaims any and all conditions relating to: results; and workmanlike effort. IEEE standards documents are supplied “AS IS” and “WITH ALL FAULTS.” -Use of an IEEE standard is wholly voluntary. The existence of an IEEE standard does not imply that there are no other ways to produce, test, measure, purchase, market, or provide other goods and services related to the scope of the IEEE standard. Furthermore, the viewpoint expressed at the time a standard is approved and issued is subject to change brought about through developments in the state of the art and comments received from users of the standard. -In publishing and making its standards available, IEEE is not suggesting or rendering professional or other services for, or on behalf of, any person or entity nor is IEEE undertaking to perform any duty owed by any other person or entity to another. Any person utilizing any IEEE Standards document, should rely upon his or her own independent judgment in the exercise of reasonable care in any given circumstances or, as appropriate, seek the advice of a competent professional in determining the appropriateness of a given IEEE standard. -IN NO EVENT SHALL IEEE BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO: PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE PUBLICATION, USE OF, OR RELIANCE UPON ANY STANDARD, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE AND REGARDLESS OF WHETHER SUCH DAMAGE WAS FORESEEABLE. - -### Translations - -The IEEE consensus development process involves the review of documents in English only. In the event that an IEEE standard is translated, only the English version published by IEEE should be considered the approved IEEE standard. - -### Official statements - -A statement, written or oral, that is not processed in accordance with the IEEE-SA Standards Board Operations Manual shall not be considered or inferred to be the official position of IEEE or any of its committees and shall not be considered to be, or be relied upon as, a formal position of IEEE. At lectures, symposia, seminars, or educational courses, an individual presenting information on IEEE standards shall make it clear that his or her views should be considered the personal views of that individual rather than the formal position of IEEE. - -### Comments on standards - -Comments for revision of IEEE Standards documents are welcome from any interested party, regardless of membership affiliation with IEEE. However, IEEE does not provide consulting information or advice pertaining to IEEE Standards documents. Suggestions for changes in documents should be in the form of a proposed change of text, together with appropriate supporting comments. Since IEEE standards represent a consensus of concerned interests, it is important that any responses to comments and questions also receive the concurrence of a balance of interests. For this reason, IEEE and the members of its societies and Standards Coordinating Committees are not able to provide an instant response to comments or questions except in those cases where the matter has previously been addressed. For the same reason, IEEE does not respond to interpretation requests. Any person who would like to participate in revisions to an IEEE standard is welcome to join the relevant IEEE working group. - -Comments on standards should be submitted to the following address: - - Secretary, IEEE-SA Standards Board - - 445 Hoes Lane - - Piscataway, NJ 08854 USA - - -### Laws and regulations - -Users of IEEE Standards documents should consult all applicable laws and regulations. Compliance with the provisions of any IEEE Standards document does not imply compliance to any applicable regulatory requirements. Implementers of the standard are responsible for observing or referring to the applicable regulatory requirements. IEEE does not, by the publication of its standards, intend to urge action that is not in compliance with applicable laws, and these documents may not be construed as doing so. - -### Copyrights - -IEEE draft and approved standards are copyrighted by IEEE under U.S. and international copyright laws. They are made available by IEEE and are adopted for a wide variety of both public and private uses. These include both use, by reference, in laws and regulations, and use in private self-regulation, standardization, and the promotion of engineering practices and methods. By making these documents available for use and adoption by public authorities and private users, IEEE does not waive any rights in copyright to the documents. - - -### Photocopies - -Subject to payment of the appropriate fee, IEEE will grant users a limited, non-exclusive license to photocopy portions of any individual standard for company or organizational internal use or individual, non-commercial use only. To arrange for payment of licensing fees, please contact Copyright Clearance Center, Customer Service, 222 Rosewood Drive, Danvers, MA 01923 USA; +1 978 750 8400. Permission to photocopy portions of any individual standard for educational classroom use can also be obtained through the Copyright Clearance Center. - -### Updating of IEEE Standards documents - -Users of IEEE Standards documents should be aware that these documents may be superseded at any time by the issuance of new editions or may be amended from time to time through the issuance of amendments, corrigenda, or errata. A current IEEE document at any point in time consists of the current edition of the document together with any amendments, corrigenda, or errata then in effect. -Every IEEE standard is subjected to review at least every ten years. When a document is more than ten years old and has not undergone a revision process, it is reasonable to conclude that its contents, although still of some value, do not wholly reflect the present state of the art. Users are cautioned to check to determine that they have the latest edition of any IEEE standard. -In order to determine whether a given document is the current edition and whether it has been amended through the issuance of amendments, corrigenda, or errata, visit IEEE Xplore at http://ieeexplore.ieee.org/ or contact IEEE at the address listed previously. For more information about the IEEE-SA or IEEE’s standards development process, visit the IEEE-SA Website at http://standards.ieee.org. - -### Errata - -Errata, if any, for all IEEE standards can be accessed on the IEEE-SA Website at the following URL: http://standards.ieee.org/findstds/errata/index.html. Users are encouraged to check this URL for errata periodically. - -### Patents - -Attention is called to the possibility that implementation of this standard may require use of subject matter covered by patent rights. By publication of this standard, no position is taken by the IEEE with respect to the existence or validity of any patent rights in connection therewith. If a patent holder or patent applicant has filed a statement of assurance via an Accepted Letter of Assurance, then the statement is listed on the IEEE-SA Website at http://standards.ieee.org/about/sasb/patcom/patents.html. Letters of Assurance may indicate whether the Submitter is willing or unwilling to grant licenses under patent rights without compensation or under reasonable rates, with reasonable terms and conditions that are demonstrably free of any unfair discrimination to applicants desiring to obtain such licenses. -Essential Patent Claims may exist for which a Letter of Assurance has not been received. The IEEE is not responsible for identifying Essential Patent Claims for which a license may be required, for conducting inquiries into the legal validity or scope of Patents Claims, or determining whether any licensing terms or conditions provided in connection with submission of a Letter of Assurance, if any, or in any licensing agreements are reasonable or non-discriminatory. Users of this standard are expressly advised that determination of the validity of any patent rights, and the risk of infringement of such rights, is entirely their own responsibility. Further information may be obtained from the IEEE Standards Association. - -### Participants - -At the time this draft standard was completed, the P2791 Working Group had the following membership: - -**Raja Mazumder**, Chair - -**Vahan Simonyan**, Vice Chair - -Ogan Abaan, -Jonas Almeida, -Gil Alterovitz, -Payal Banerjee, -Amanda Bell, -Surajit Bhattacharya, -Lee Black, -Ben Busby, -Kristy Cloyd-Warwick, -Ryan Connor, -Michael Crusoe, -Dennis Dean, -Paul Duncan, -Josep Gelpi, -Carole Goble, -Jeremy Goecks, -Jonathan Jacobs, -Robel Kahsay, -Jonathon Keeney, -Charles Hadley King, -Jonathan LoTempio, -Xeandong Meng, -David Michaels, -Hiroki Morizono, -Rahi Navelkar, -Asa Oudes, -Janisha Patel, -John Penn, -Megan Pottersbusch, -Jonathan Pryke, -Stian Soiland-Reyes, -Dan Taylor, -Jason Travis, -Paul Walsh, -Jianchao Yao - - -The following members of the individual/entity balloting committee voted on this standard. Balloters may have voted for approval, disapproval, or abstention. - -**[To be supplied by IEEE]** - - Balloter1 - - Balloter2 - - Balloter3 - - Balloter4 - - Balloter5 - - Balloter6 - - Balloter7 - - Balloter8 - - Balloter9 - - -When the IEEE-SA Standards Board approved this standard on ``, it had the following membership: -**[To be supplied by IEEE]** - - , Chair - , Vice Chair - , Past Chair - Konstantinos Karachalios, Secretary - - SBMember1 - SBMember2 - SBMember3 - SBMember4 - SBMember5 - SBMember6 - SBMember7 - SBMember8 - SBMember9 - - *Member Emeritus - - -### Introduction - -> This introduction is not part of P2791/D1, Draft Standard for Bioinformatics Analyses Generated by High-Throughput Sequencing (HTS) to Facilitate Communication. - -BioCompute standardizes bioinformatics workflows in the genomic analysis space. BioCompute addresses the tremendous variability and uncertainty in communicating bioinformatics workflows and data related to analysis as a result of high throughput sequencing (HTS). The need to resolve issues in communication was felt particularly strongly between the United States Food and Drug Administration (FDA) and the entities that submit any work to the FDA for regulatory analysis that includes an HTS component . A plan to for BioCompute and initial goals of the project were drafted in a collaboration between the George Washington University and the FDA in 2014. The project has grown since then to include publications, workshops, applied use cases, and a large community of participants and collaborators. The standard is intended: -1) to be both human and machine readable, -2) to be applied to genomic analysis workflows, and -3) to able to capture all details related to a workflow in such a way as to facilitate efficient communication and improve reproducibility and interoperability. - -Every effort is made to accommodate any tool, platform or script, and to be adaptable to future developments in this field under a unified set of descriptions to standardize and streamline the representations of such complex bioinformatics processes. - - -BioCompute is a standard and a BioCompute Object (BCO) is an instance of that standard. High throughput sequencing (HTS), also referred to as next-generation sequencing (NGS) or massively parallel sequencing (MPS), has increased the pace at which we generate, compute and share genomic data in biomedical sciences. As a result, scientists, clinicians and regulators are now faced with a new data paradigm that is less portable, more complex and most of all poorly standardized. The BCO uses a simple JSON format to encode important information on the execution of computational pipelines, or for the creation of knowledge bases. BioCompute can be process oriented (for software pipelines) and/or product oriented (for knowledge bases). So error domain can include information to do QA and/or QC. The goal of using a BCO is to streamline communication of these otherwise difficult to elucidate details between stakeholders in academia, industry and regulatory agencies. Encapsulating HTS data processing in a BCO will facilitate swift communications between the FDA and other stakeholders who seek regulatory review/approval hence reducing the burden and time to decision. - - -The US Food and Drug Administration (FDA) and George Washington University (GW) have partnered to establish a framework for community-based standards development and harmonization of HTS computations and data formats. Standardized HTS data processing descriptions and data formats will promote interoperability and simplify the verification of the bioinformatics protocols applied against data. To do this, a schema has been developed to represent instances of computational analysis as a BCO. A BCO includes: - -- Information about parameters and versions of the executable programs in a pipeline -- Reference to input and output test data for verification of the pipeline -- A usability domain -- Keywords -- A list of agents involved along with other important metadata, such as their specific contribution - -Knowledge of input data is intended to be captured according to existing efforts, including MIRAGE, MIAPE, and STRENDA, and to be in accordance with Minimum Information Standards. In addition to all the information captured in the BCO, the BCO itself must be independent of the execution environment, whether it is a local or a cloud-based infrastructure. - - -#### **Contents** - -* [1. Overview](#1-overview) -* [1.1 General](#11-general) -* [1.2 Scope](#12-scope) -* [1.3 Purpose](#13-purpose) -* [2. Normative references](#2-purpose) -* [3. Definitions, acronyms, and abbreviations](#3-definitions-acronyms-and-abbreviations) -* [3.1 Acronyms and abbreviations](#31-acronyms-and-abbreviations) -* [4. BioCompute Standard](4-biocompute-standard) -* [4.1 General](41-general) -* [Annex A (informative) Bibliography](#annex-a-informative) - -# Draft Standard for Bioinformatics Analyses Generated by High-Throughput Sequencing (HTS) to Facilitate Communication - --- - -## 1. Overview - -### 1.1 General - -The BioCompute standard captures relevant information from a high throughput sequencing workflow in order to enable a user to understand and interpret the workflow efficiently and with high confidence. BioCompute is a standard that is particularly well adapted to regulatory review. Pursuant to this, workflow steps and prerequisites to execute workflow steps are recorded in detail in the BioCompute standard. Information is recorded using key/value pairs in JavaScript Object Notation (JSON), adhering to the JSON Schema. -Key/value pairs are organized by domains; - -* The Provenance Domain - tracks metadata -* The Usability Domain - tracks what was done -* The Extension Domain - provide user-defined fields -* The Description Domain - captures a description of external resources, pipeline steps, and the relationships of I/O objects -* The Execution Domain - describes information needed for deployment, software configuration and running applications in a dependent environment -* The Parametric Domain - captures all parameters that customize a computational flow -* The Input and Output Domain - contains a list of global input and output files -* The Error Domain - describes errors, including the limits of detectability, false positives, false negatives, statistics confidence of outcomes, and description of errors (i.e. empirical or algorithmic). - -### 1.2 Scope - -This standard establishes accurate and secure communication of bioinformatics protocols in order to facilitate bioinformatics workflow related exchange and communication between regulatory agencies, pharmaceutical companies, bioinformatics platform providers and researchers. Accurate communication helps ensure responsibility, verify bioinformatics protocol, track provenance information and promote interoperability. - -### 1.3 Purpose - -The standards allows for the cross platform communications of complex computation from inception to manufacturing of medical products and services, resulting in decreased costs of drug discovery and review, and accelerated delivery of treatment to patients. - -## 2. Normative references - -The following referenced documents are indispensable for the application of this document (i.e., they must be understood and used, so each referenced document is cited in text and its relationship to this document is explained). For dated references, only the edition cited applies. For undated references, the latest edition of the referenced document (including any amendments or corrigenda) applies. -[BioCompute Schema](https://www.w3id/biocompute/schemas/1.3.0/biocomputeobject.json) -[JSON Schema](https://json-schema.org/latest/json-schema-core.html) -[description_domain](https://www.w3id/biocompute/schemas/1.3.0/description_domain.json) -[execution_domain](https://www.w3id/biocompute/schemas/1.3.0/execution_domain.json) -[io_domain](https://www.w3id/biocompute/schemas/1.3.0/io_domain.json) -[parametric_domain](https://www.w3id/biocompute/schemas/1.3.0/parametric_domain.json) -[provenance_domain](https://www.w3id/biocompute/schemas/1.3.0/provenance_domain.json) -[usability_domain](https://www.w3id/biocompute/schemas/1.3.0/usability_domain.json) - -## 3. Definitions, acronyms, and abbreviations - -For the purposes of this document, the following terms and definitions apply. The IEEE Standards Dictionary Online should be consulted for terms not defined in this clause. - -### 3.1 Acronyms and abbreviations - -BCO BioCompute Object -JSON JavaScript Object Notation -FHIR Fast Healthcare Interoperability Resources -SCM Source Control Management - -## 4. BioCompute Standard - -### 4.1 General - -A BCO is a text file written in JSON data structure that shall consist of all domains required by the BioCompute Schema . A BCO shall be written in JSON Schema, and therefore invokes all of the requirements of the JSON Schema. The minimum requirement to execute the standard is the fully organized BCO containing all domains in proper JSON Schema format. Pursuant to JSON schema, the required fields are listed at the top of the BCO. - -The fully organized BCO file is hosted in the schemas folder, along with related files. All the files in the schemas folder are linked together (using JSON pointers as described by the JSON Schema), being referenced by the `biocomputeobject.json` file. For development purposes, these files are used to track changes, but some are not required to adhere to the standard. Those required for a complete BCO are the biocomputeobject.json, `description_domain.json`, `execution_domain.json`, `io_domain.json`, `parametric_domain.json`, `provenance_domain.json`, and `usability_domain.json`. The `error_domain.json` is an optional domain that further describes a bioinformatics workflow, and the extension_domain is an optional domain that contains user-defined fields. - -The top three lines of a BCO (`bco_spec_version`, `bco_id`, and `checksum`) are metadata that describe the BCO. These lines are external to all domains. The checksum is calculated on all following lines. - -Files in the schemas folder are kept separate for organization. References in the BCO schema ($ref) to these files should be replaced with the proper domain from the appropriate file. For example, line 141 (“$ref”: `provenance_domain.json`) is a reference to the structure specified in the `provenance_domain.json` file. The BCO Schema builds on the JSON Schema by adding domains in a way that facilitates the communication of bioinformatics workflows. A description of the domain files follows. In addition, two examples have been generated by the community of users, as well as a tool to automate the creation of a file using the BCO schema standard. - -The Description Domain of a BioCompute Object contains a description of external resources, pipeline steps, and the relationship of I/O objects. - -The Error Domain contains information related to the bounds of detection (such as the minimum sequence depth and minimum sequence coverage), and statistical analyses of the pipeline (such as the false negative and false positive rates). Fields in the Error Domain can be determined algorithmically (by repeatedly invoking the pipeline with the same data) or empirically (by invoking the pipeline with different data, often synthetically generated data). - -The Execution Domain of a BioCompute Object contains information needed for deployment, software configuration, and running applications in a dependent environment. This may include scripts, drivers, environment variables, and other software prerequisites. - -The IO Domain of a BioCompute Object is a list of global input and output files that may exist on local machine or on another machine. It does not include references to intermediate files. - -The Parametric Domain of a BioCompute Object includes any parameters used in a workflow. This is typically used only in the context of parameters changed from default settings for ease of understanding. - -The Provenance Domain contains metadata related to the BCO and is not used for computation ,. It is used to track the flow of data from original source to final computation, and includes contributors, reviewers, and versioning. -The Usability Domain of a BioCompute Object is a plain language description of what was done in the workflow. This is not used for computation, and should align with the actual steps described elsewhere in the BCO. The Usability Domain conveys the purpose of the BCO, and improves searchability of the BCO. It is recommended that a novel use of the BCO could result in the creation of a new entry with a new Usability Domain. - -The Extension Domain allows a user to define additional fields and is optional. A separate folder called extension_domain exists within the schemas folder. Two Extension Domain example files exist in the extension_domain folder that describe how a BCO can include a reference to FHIR (Extension Domain example: FHIR ) and/or to SCM (Extension Domain example: SCM ). - -Additional helpful resources have been created, including a [Community User Guide for Best Practices][B1]. This document describes ways in which the schema has been used and is known to be effective, using these to derive best practices. In addition, a [repository of examples][B3] exists, which includes the use of optional Error Domain. A [BCO Editor][B4] tool has also been generated. The BCO Editor is an example implementation of the schema, and can be used to create and edit BCO’s. Finally, a [script to validate][B5] that documents have been created according to the BCO schema is also available for use. This python tool will check a document to ensure that it has been created according to the current BCO Schema. - -## Annex A (informative) - -### Bibliography -Bibliographical references are resources that provide additional or helpful material but do not need to be understood or used to implement this standard. Reference to these resources is made for informational use only. - -[Community User Guide for Best Practices](https://w3id.org/biocompute/spec/1.3.0/user_guide.md) - -[B1]: https://w3id.org/biocompute/spec/1.3.0/user_guide.md - -[JSON Schema: A Media Type for Describing JSON Documents](https://json-schema.org/latest/json-schema-core.html) - -[B2]: https://json-schema.org/latest/json-schema-core.html - -[Example Repository](https://w3id.org/biocompute/examples/HCV1a.json) - -[B3]: https://w3id.org/biocompute/examples/HCV1a.json - -[BCO Editor](https://w3id.org/biocompute/bco_editor/html/js) - -[B4]: https://w3id.org/biocompute/bco_editor/html/js - -[Validation Script](https://w3id.org/biocompute/validate.py) - -[B5]: https://w3id.org/biocompute/validate.py diff --git a/README.md b/README.md index 537dc4e..75c8733 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ BioCompute ========== -This version: [draft-1.4.0](https://github.com/biocompute-objects/BCO_Specification/tree/dev) +This version: [1.4.0](https://github.com/biocompute-objects/BCO_Specification/tree/1.4.0) Previous version: [v1.3.1](https://github.com/biocompute-objects/BCO_Specification/releases/tag/1.3.1) @@ -33,7 +33,7 @@ A functional example of a BCO with associated input and output files, and includ ## User Guide -The [BioCompute Objects user guide](/docs/user_guide.md) provides an introduction to implementing/writing a BCO for a pipeline and/or a workflow, and is taken from the [BioCompute Objects Specification Document](/IEEE_Docs/standard.md). +The [BioCompute Objects user guide](/content/user_guide.md) provides an introduction to implementing/writing a BCO for a pipeline and/or a workflow, and is taken from the [BioCompute Objects Specification Document](/IEEE_Docs/standard.md). ### Repository @@ -41,19 +41,19 @@ Note that unless you are viewing a [release](https://github.com/biocompute-objec Table of content: -* [BioCompute Object (BCO) User Guide](/docs/user_guide.md) - * [Introduction to BioCompute Objects](/docs/introduction.md) - * [BCO domains](/docs/bco-domains.md) - * [Top level fields](/docs/top-level.md) - * [Provenance domain](/docs/provenance-domain.md) - * [Usability domain](/docs/usability-domain.md) - * [FHIR extension](/docs/extension-fhir.md) - * [SCM extension](/docs/extension-scm.md) - * [Description domain](/docs/description-domain.md) - * [Execution domain](/docs/execution-domain.md) - * [Parametric domain](/docs/parametric-domain.md) - * [Input and Output domain](/docs/io-domain.md) - * [Error domain](/docs/error-domain.md) +* [BioCompute Object (BCO) User Guide](/content/user_guide.md) + * [Introduction to BioCompute Objects](/content/introduction.md) + * [BCO domains](/content/bco-domains.md) + * [Top level fields](/content/top-level.md) + * [Provenance domain](/content/provenance-domain.md) + * [Usability domain](/content/usability-domain.md) + * [FHIR extension](/content/extension-fhir.md) + * [SCM extension](/content/extension-scm.md) + * [Description domain](/content/description-domain.md) + * [Execution domain](/content/execution-domain.md) + * [Parametric domain](/content/parametric-domain.md) + * [Input and Output domain](/content/io-domain.md) + * [Error domain](/content/error-domain.md) * [BCO expanded view example HCV1a.json](HCV1a.json) ## Specification @@ -90,4 +90,4 @@ As a subscriber to the BCO mailing list, you can post to it by sending a message To subscribe or unsubscribe, please visit https://hermes.gwu.edu/cgi-bin/wa?A0=BIOCOMPUTELS and click `Subscribe` or `Unsubscribe` on the lower right. You can also unsubscribe from the list at any time by sending an email to listserv@hermes.gwu.edu, in which the body says: `unsubscribe biocomputels` -Please also see our [OSF page](https://osf.io/h59uh/) or our [main page](https://biocomputeobject.org/) +This repository is in support of [2791-2020](https://standards.ieee.org/standard/2791-2020.html) - IEEE Approved Draft Standard for Bioinformatics Computations and Analyses Generated by High-Throughput Sequencing (HTS) to Facilitate Communication. Please also see our [OSF page](https://osf.io/h59uh/) or our [main page](https://biocomputeobject.org/) diff --git a/VERSION.py b/VERSION.py new file mode 100644 index 0000000..8aae92b --- /dev/null +++ b/VERSION.py @@ -0,0 +1,3 @@ +VERSION_MAJOR = "1.4" +VERSION_MINOR = ".0" +VERSION = VERSION_MAJOR + ('.' + VERSION_MINOR if VERSION_MINOR else '') \ No newline at end of file diff --git a/config.toml b/config.toml new file mode 100644 index 0000000..8a74877 --- /dev/null +++ b/config.toml @@ -0,0 +1,71 @@ +baseURL = "/" +languageCode = "en-us" +title = "BioCompute Object Documentation" +theme = "hugo-biocompute" + +enableEmoji = true +hasCJKLanguage = true + +pygmentsstyle = "github" + +[markup] + [markup.goldmark] + [markup.goldmark.renderer] + unsafe = true + +[params] +copyright = "© 2018 - 2020 BioCompute. All rights reserved." +faviconfile = "images/favicon.png" +posts_navigation = true +uselatex = false +highlightjs = false +highlightjslanguages = ["r"] +progressively = false +#google_tag_manager = "UA-79911051-3" + +contact = "object.biocompute@gmail.com" +github = "biocompute-objects" +twitter = "BioComputeObj" +# linkedin = "example" + +[[menu.primary]] +identifier = "home" +name = "Documentation Home" +url = "/" +weight = 1 + +[[menu.primary]] +identifier = "about" +name = "About " +url = "/about" +weight = 2 + +[[menu.primary]] +identifier = "user_guide" +name = "User Guide" +url = "/user_guide" +weight = 3 + +[[menu.primary]] +identifier = "best_practices" +name = "Best Practices" +url = "/best_practices" +weight = 4 + +[[menu.primary]] +identifier = "sop" +name = "Curation SOP" +url = "/sop" +weight = 5 + +[[menu.primary]] +identifier = "events" +name = "News & Events" +url = "/events" +weight = 6 + +[[menu.primary]] +identifier = "biocomputeobject.org" +name = "BioCompute Portal" +url = "http://portal.biochemistry.gwu.edu/" +weight = 7 diff --git a/content/_index.md b/content/_index.md new file mode 100644 index 0000000..a811a1f --- /dev/null +++ b/content/_index.md @@ -0,0 +1,52 @@ +--- +title: "Home" +menu: "main" +--- + + + + +
+BioCompute Logo +
+ +
+ +### The BioCompute Standard + +Because of the many different ways to organize data, a major goal of the BioCompute project is to build and maintain a formal standard through recognized, accredited standards setting organizations like the Institute for Electrical and Electronics Engineers (IEEE) and the International Standards Organization (ISO). A formal, consensus-based standard builds predictability and even more stability into the way in which bioinformatic methods are communicated. + +The standard, officially known as 2791-2020, has two parts: the standards document and the schema, which is maintained in an open source repository: + +- **The current version of the standard can be found [here](https://standards.ieee.org/standard/2791-2020.html)**. +- **The schema can be found [here](https://opensource.ieee.org/2791-object/ieee-2791-schema)**. + +Since the base BioCompute schema is maintained as an open source repository, it can be cloned and integrated into an organization in unique ways, which allows organizations to build off of this schema to create dependent standards for specific applications. This is similar to the different versions of WiFi based on usage, such as the 802.11a standard for fast speed, but high cost and shorter range, or the 802.11b for slower top speed, but lower cost, etc. --- all of which are built on the 802.11 base standard. It can also be used to further extend the schema, such as for handling proprietary, internal content, while still being compatible with the base standard. The open source schema also enables individuals or organizations to suggest changes to be incorporated into future versions the standard. + +### Citation +This standard was originaly prepared by [The BioCompute Object working group](/BCO_Spec_V1.2.md#biocompute-object-consortium-members-bcoc) during preparation for the [2017 HTS Computational Standards for Regulatory Sciences Workshop](https://hive.biochemistry.gwu.edu/htscsrs/workshop_2017). + +To reference the BCO standards, please use the following +citation inclusive of the DOI: + +Simonyan, V., Goecks, J., & Mazumder, R. (2017). ***Biocompute Objects — A Step towards Evaluation and Validation of Biomedical Scientific Computations.*** PDA Journal of Pharmaceutical Science and Technology, 71(2), 136–146. doi: [10.5731/pdajpst.2016.006734](http://doi.org/10.5731/pdajpst.2016.006734) + +## Support, Community and Contributing + +To suggest changes to [this repository](#Repository) we welcome contributions as a [pull request](https://github.com/biocompute-objects/BCO_Specification/pulls) or [issue](https://github.com/biocompute-objects/BCO_Specification/issues) submission. + +BCO_Specification is licensed under the [BSD 3-Clause "New" or "Revised" License](./LICENSE) + +>A permissive license similar to the BSD 2-Clause License, but with a 3rd clause that prohibits others from using the name of the project or its contributors to promote derived products without written consent. + +## Mailing List + +As a subscriber to the BCO mailing list, you can post to it by sending a message tobiocomputels@hermes.gwu.edu (using the email address that is subscribed). This list is semi-automated and will send your message for review. + +To subscribe or unsubscribe, please visit https://hermes.gwu.edu/cgi-bin/wa?A0=BIOCOMPUTELS and click `Subscribe` or `Unsubscribe` on the lower right. You can also unsubscribe from the list at any time by sending an email to listserv@hermes.gwu.edu, in which the body says: `unsubscribe biocomputels` + +This repository is in support of [2791-2020](https://standards.ieee.org/standard/2791-2020.html) - IEEE Approved Draft Standard for Bioinformatics Computations and Analyses Generated by High-Throughput Sequencing (HTS) to Facilitate Communication. Please also see our [OSF page](https://osf.io/h59uh/) or our [main page](https://biocomputeobject.org/) \ No newline at end of file diff --git a/content/about.md b/content/about.md new file mode 100644 index 0000000..daad888 --- /dev/null +++ b/content/about.md @@ -0,0 +1,73 @@ +--- +title: "About" +menu: "main" +--- + +
+BioCompute Logo +
+ +
+ +### What is BioCompute? + +Tremendous insights can be found in genome data, and many of these insights are being used to drive personalized medicine. But the hundreds of millions of reads that come from a gene sequencer represent small, nearly random fragments of the genome that's being sequenced, and there are countless ways in which that data can be transformed to yield insights into cancer, ancestry, microbiome dynamics, metagenomics, and many other areas of interest. + +Because there are so many different platforms and so many different scripts and tools to analyze genome data, there is a great need to standardize the way in which these steps are communicated. The more analysis steps and the more complicated a pipeline, the greater the need for a standardized mechanism of communication. The BioCompute standard brings clarity to an analysis, making it clear and reproducible. + +
+ +
+ +
+ +A BioCompute Object (BCO) is an instance of the BioCompute standard, and is a computational record of a bioinformatics pipeline. A BCO is not an analysis, but is a record of which analyses were executed and in exactly which ways. In this way, a BCO acts as an interface for existing standards. A BCO contains all of the necessary information to repeat an entire pipeline from FASTQ to result, and includes additional metadata to identify provenance and usage. + +### WiFi Analogy + +The [802.11 standard](https://en.wikipedia.org/wiki/IEEE_802.11) (more commonly called "WiFi") is a way of standardizing communication between vastly different products on a wireless network. If a product manufacturer wants a product to be able to communicate on a wireless internet network, they can configure the device to use the WiFi standard and it will be able to communicate with most commercial routers, regardless of whether the product is a Mac, a PC, a cell phone, or a smart toaster. + +
+ +
+ +
+ +BioCompute fills a similar need. BioCompute is not an automation or a new programming language, it is a way of collecting and communicating information between two entities. Rather than a latop and a router, it may be between a pharmaceutical company and the FDA, or between two clinicians, or between a clinician and a researcher. In much the same way that WiFi does not standardize the data that's being transmitted -- allowing you to use Apple's Facetime, Microsoft's Internet Explorer, or your favorite cell phone app -- BioCompute does not standardize the platforms or tools that are used for genome analysis. You continue to use your favorite platforms and tools, whether it's [HIVE](https://hive.biochemistry.gwu.edu/dna.cgi?cmd=main), [Galaxy](https://galaxyproject.org/), [Seven Bridges](https://www.sevenbridges.com/), [DNAnexus](https://www.dnanexus.com/), or others. Also like WiFi, BioCompute can be layered with other privacy or security protocols depending on usage. So clinical trial data can be secured and HIPAA-compliant, while government-funded data sets shared between researchers can be completely open access. + +Because BioCompute acts like an envelope for an entire analysis pipeline, it is compatible with other existing standards, including [FHIR Genomics](https://www.hl7.org/fhir/genomics.html) and [GA4GH](https://www.ga4gh.org/). + +### BioCompute Description + +BioCompute is written in [Javascript Object Notation (JSON)](https://json.org/example.html), which is simply a set of key:value pairs (meaning that raw files can be read without any knowledge of programming). Information within the BCO is organized into "domains." The domains within a BCO record are Provenance, Usability, Extension, Description, Execution, Input/Output, and Parametric Domains. For more information on the domains, please see the [BioCompute Schema](https://gitlab.com/IEEE-SA/2791/ieee-2791-schema). + +BioCompute was built through a collaboration between The George Washington University and the FDA to improve communication of bioinformatics pipelines, and has since been expanded and refined through the participation or collaboration of hundreds of participants from throughout the public and private sectors. While we welcome interest and membership from anyone, most users will fall into one of three categories: + +- [Research Community](/research)
+ The Biocompute standard can help substantially improve replicability, making it possible to repeat a pipeline on a different sample with high fidelity and high confidence. + +- [Clinical Community](/clinical)
+ As BioCompute Objects become tested and validated, they can be applied in the clinic to identify risk factors, flag pharmakogenetic information, and much more. + +- [Pharma, Biotech and Regulatory Pipeline](/regulatory)
+ Protracted communications with the FDA can extend the review process by months. A standardized method of communicating HTS data may help repeat results more quickly and without the need for additional communication. + +Research, clinical, and regulatory groups are key drivers of personalized medicine that is based on next generation sequencing, but there are barriers between these groups. BioCompute reduces these hurdles and brings transparency to the workflow, making it more clear what was done, and clearly delineating expectations for data sharing. The BioCompute specification can be layered with other privacy and security protocols to guard sensitive data, or be made open source depending on the needs of the user. + +The BioCompute project has generated two publications, three workshops, FDA funding, contributions from over 300 participants, and FDA submissions. The project has worked with individuals from NIH, Harvard, several biotech and pharma companies, EMBL-EBI, Galaxy Project, and many more, and can be integrated with any existing standard for HTS data. The project is expected to be both an IEEE and ISO recognized standard within 8-10 months. + +More information about The current BioCompute standard can be found on the [Open Science Foundation website](https://osf.io/h59uh/) (where the standard is developed and maintained), the [HIVE](https://hive.biochemistry.gwu.edu/htscsrs/biocompute) website, and the [Research Objects discussion of BioCompute](http://www.researchobject.org/2017-11-27-biocompute-objects/). + +
+ +
+ +
+ + diff --git a/docs/bco-domains.md b/content/bco-domains.md similarity index 75% rename from docs/bco-domains.md rename to content/bco-domains.md index a05c321..67877ba 100644 --- a/docs/bco-domains.md +++ b/content/bco-domains.md @@ -1,4 +1,22 @@ -_This document is part of the [BioCompute Object User Guide](user_guide.md)_ +--- +title: "BCO Domains" +menu: "main" +--- + + + + +
+BioCompute Logo +
+ +
+ +_This document is part of the [BioCompute Object User Guide](/user_guide)_ # BCO domains @@ -42,7 +60,7 @@ The top level JSON object of a BCO include attributes to define the BCO itself. Definition: -* [Top level fields](top-level.md) +* [Top level fields](/top-level) The remaining top level fields details different _domains_ of the BCO, as listed below. @@ -52,7 +70,7 @@ The `provenance_domain` defines the history, version and status of this BCO as p Definition: -* [Provenance domain](provenance-domain.md) +* [Provenance domain](/provenance-domain) ## 2.2 Usability Domain "usability_domain" @@ -61,7 +79,7 @@ The `usability_domain` is meant to improve searchability by allowing free-text d Definition: -* [Usability domain](usability-domain.md) +* [Usability domain](/usability-domain) ## 2.3 Extension Domain "extension_domain" @@ -70,8 +88,8 @@ The `extension_domain` is a space for a user to add additional structured inform Definitions: -* [fhir extension](extension-fhir.md) -* [scm extension](extension-scm.md) +* [fhir extension](/extension-fhir) +* [scm extension](/extension-scm) ## 2.4 Description Domain "description_domain" @@ -80,7 +98,7 @@ The `description_domain` contain structured field for description of external re Definition: -* [Description domain](description-domain.md) +* [Description domain](/description-domain) ## 2.5 Execution Domain "execution_domain" @@ -88,7 +106,7 @@ The `execution_domain` contains fields for execution of the BCO. Definition: -* [Execution domain](execution-domain.md) +* [Execution domain](/execution-domain) ## 2.6 Parametric Domain "parametric_domain" @@ -97,7 +115,7 @@ The `parametric_domain` represents the list of parameters customizing the comput Definition: -* [Parametric domain](parametric-domain.md) +* [Parametric domain](/parametric-domain) ## 2.7 Input and Output Domain "io_domain" @@ -105,7 +123,7 @@ The `io_domain` represents the list of global input and output files created by Definition: -* [Input and Output domain](io-domain.md) +* [Input and Output domain](/io-domain) ## 2.8 Error Domain, acceptable range of variability "error_domain" @@ -114,4 +132,4 @@ The `error_domain` defines the empirical and algorithmic limits an d error sourc Definition: -* [Error domain](error-domain.md) +* [Error domain](/error-domain) diff --git a/content/best_practices.md b/content/best_practices.md new file mode 100644 index 0000000..2b9bebe --- /dev/null +++ b/content/best_practices.md @@ -0,0 +1,69 @@ +--- +title: "BCO Best Practice" +menu: "main" +--- + + + + +
+BioCompute Logo +
+ +
+ +
+

BioCompute Objects Best Practice

+
+ +---- + +## General +* The required domains are defined by the IEEE . However, a BioCompute Object is considered complete when an Error Domain exists. +* Versioning is allowed, but only if the changes do not affect the workflow or output. BCO versioning follows a minor.patch schema, no major versions are allowed (substantial changes result in a new BCO). Minor changes are things like a change of contact information for a contributor, patch changes are things like spelling and grammar fixes. +* In general, any step that does not transform data does not need to be included in the Description Domain as a formal step, and can be described instead in the Usability Domain. For example, arranging rows and columns in a table, or formatting a figure. Steps that transform data should comprise their own step in the Description Domain. +* The Usability Domain should contain enough information to enable a naïve user generally skilled in bioinformatics to understand the analysis. This means that references to commonly used resources (such as basic Unix commands, well known databases like NCBI, basic terms like “alignment,” etc.) do not need to be explained, but references to less well known resources (such as obscure python packages, etc.) should be described. Description should be tailored to the intended audience, and BCOs intended for public consumption should assume a basic level of bioinformatics proficiency. + +## BioCompute Registry + +The [BioCompute Registry](https://biocomputeobject.org/tst/registry.html) is a domain registry for BCO IDs in which users can register their institution or organization. Similar to a website registry, this will allow the owner of that domain to use any domain organization of their choosing, and prevent naming collisions between groups. For example, the owner of “GW” can build BCOs GW_0001.1, GW01A, GW_, or any other naming system of their preference, and these will not conflict with another registered domain, such as FDA_0001.1, etc. The BCO Registry registration numbers may not exceed five characters, and are recommended to be three characters. Any alphanumeric characters are acceptable. + +A BCO may be registered only by the author of the object, and the domain must be approved by the domain holder. Until automated systems are in place, register a BCO by sending the BCO ID and email of the registrant to the [BioCompute Team](mailto:keeneyjg@gwu.edu). The following institutional domains have been reserved: + +* GWU +* FDA +* NIH +* CDC +* NCI + +## Preferred Ontologies + +### Semantic Versioning +BCO versioning should adhere to [semantic versioning](https://semver.org/) to establish how version numbers are assigned and incremented. Given a version number MAJOR.MINOR.PATCH, when versioning a BCO increment the: +1. MAJOR version when you make incompatible API changes, +2. MINOR version when you add functionality in a backwards-compatible manner, and +3. PATCH version when you make backwards-compatible bug fixes. +Additional labels for pre-release and build metadata are available as extensions to the MAJOR.MINOR.PATCH format. + +### PAV Ontology and PROV-O +To preserve the provenance of each BCO, the contribution type of the reviewers and contributors is a choice taken from PAV ontology: provenance, authoring and versioning, which also maps to the [PROV-O](https://www.w3.org/TR/prov-o/). The following are possible values for the status of an object in the review process: +* `unreviewed` flag indicates that the object has been submitted, but no further evaluation or verification has occurred. +* `in-review` flag indicates that verification is underway. +* `approved` flag indicates that the BCO has been verified and reviewed. +* `suspended` flag indicates an object that was once valid is no longer considered valid. +* `rejected` flag indicates that an error or inconsistency was detected in the BCO, and it has been removed or rejected. + +### Namespace: CURIE +External references field contains a list of the databases and/or ontology IDs that are cross-referenced in the BCO. The external references are used to provide more specificity in the information related to BCO entries. Cross-referenced resources need to be available in the public domain. The external references are stored in the form of prefixed identifiers (CURIEs). These CURIEs map directly to the URIs maintained by identifiers.org. See Section 3.5 for a list of the CURIEs used in this example. + +## General +* The required domains are defined by the IEEE . However, a BioCompute Object is considered complete when an Error Domain exists. +* Versioning is allowed, but only if the changes do not affect the workflow or output. BCO versioning follows a minor.patch schema, no major versions are allowed (substantial changes result in a new BCO). Minor changes are things like a change of contact information for a contributor, patch changes are things like spelling and grammar fixes. +* In general, any step that does not transform data does not need to be included in the Description Domain as a formal step, and can be described instead in the Usability Domain. For example, arranging rows and columns in a table, or formatting a figure. Steps that transform data should comprise their own step in the Description Domain. +* The Usability Domain should contain enough information to enable a naïve user generally skilled in bioinformatics to understand the analysis. This means that references to commonly used resources (such as basic Unix commands, well known databases like NCBI, basic terms like “alignment,” etc.) do not need to be explained, but references to less well known resources (such as obscure python packages, etc.) should be described. Description should be tailored to the intended audience, and BCOs intended for public consumption should assume a basic level of bioinformatics proficiency. + + diff --git a/docs/description-domain.md b/content/description-domain.md similarity index 93% rename from docs/description-domain.md rename to content/description-domain.md index a79da78..04ce250 100644 --- a/docs/description-domain.md +++ b/content/description-domain.md @@ -1,11 +1,29 @@ -_This document is part of the [BioCompute Object User Guide](user_guide.md)_ +--- +title: "Description Domain" +menu: "main" +--- -_Back to [BCO domains](bco-domains.md)_ + + + +
+BioCompute Logo +
+ +
+ +_This document is part of the [BioCompute Object User Guide](/user_guide)_ + +_Back to [BCO domains](/bco-domains)_ ## 2.4 Description Domain "description_domain" -This section defines the fields of the `description_domain` part of the [BCO](bco-domains.md) structure. +This section defines the fields of the `description_domain` part of the [BCO](/bco-domains) structure. Structured field for description of external references, the pipeline steps, and the relationship of I/O objects. Information in this domain is not used for computation. This domain is meant to capture information that is currently being provided in FDA submission in journal format. It is possible that in the future this field can be semi-automatically generated from the execution_domain information. diff --git a/docs/error-domain.md b/content/error-domain.md similarity index 76% rename from docs/error-domain.md rename to content/error-domain.md index 7d5c262..2791122 100644 --- a/docs/error-domain.md +++ b/content/error-domain.md @@ -1,6 +1,24 @@ -_This document is part of the [BioCompute Object User Guide](user_guide.md)_ +--- +title: "Error Domain" +menu: "main" +--- -_Back to [BCO domains](bco-domains.md)_ + + + +
+BioCompute Logo +
+ +
+ +_This document is part of the [BioCompute Object User Guide](/user_guide)_ + +_Back to [BCO domains](/bco-domains)_ ## 2.8 Error Domain, acceptable range of variability "error_domain" @@ -12,7 +30,7 @@ The **algorithmic** subdomain is descriptive of errors that originate by fuzzine For data integration BCOs used to develop knowledgebases, the error domain can, for example, contain rules that determine inclusion in the knowledgebase and reference to data that pass and fail the set of rules. -The possible keys within each subdomains are workflow-specific, free text which should be readable for a human. +The possible keys within each subdomain are workflow-specific, free text which should be readable for a human. ```json "error_domain": { diff --git a/content/events.md b/content/events.md new file mode 100644 index 0000000..f9053cd --- /dev/null +++ b/content/events.md @@ -0,0 +1,140 @@ +--- +title: "Events" +menu: "main" +--- + +
+ +
+ +
+ +Welcome! This is the BioCompute Events page. Workshops are listed on the schedule below. For any questions, comments, or for a BioCompute Informational Session (15 minute WebEx) click [here](/contact) + +
+ + +
+ +
+ +

Previous Workshops

+

2021

+

Towards Interoperability: Generating BioCompute Objects on Cloud-Based Platforms for Advancing Precision Medicine

+

Register here!

+ +Date: Wednesday July 28 at 11:00AM-12:30PM ET + +Purpose: The purpose of this workshop is to understand the value of interoperability in both research and regulatory review for scientists in the public or private spaces, and especially from the perspective of FDA personnel. As part of the larger goal of smoothing communication between the FDA and private sector to reduce organization burden on both ends, this workshop will first introduce how the cloud computing platform Seven Bridges can package BioCompute Objects (BCOs) for workflow capture and reproducibility. Following the introduction, we will describe several previously observed use cases to solicit feedback on their relevance to attendees, potentially from image processing (for diagnosis), machine-learning (for communicating and exchanging models with training data), and/or multi-modal data applications. Agenda can be found on the registration page. + +Speaker: Dr. Dennis A. Dean, II is a Principal Investigator at Seven Bridges. He manages and builds interdisciplinary teams that develop complex tools and conduct data analyses from conception to deployment/completion. He leads the Translational Science and Analytics Team that includes data scientists, bioinformaticians, and genomic data scientists. He is responsible for the success of his team members across commercial, government, and internal projects. He leads collaborations with the U.S. Food and Drug Administration (FDA), the U.S. Department of Veteran Affairs Million Veteran Program (MVP) and collaborates with large pharmaceutical companies. Dr. Dean trained as a research fellow in medicine at the Harvard Medical School and Brigham and Women’s Hospital in the Program for Sleep Epidemiology and the Program for Sleep and Cardiovascular Medicine. He earned his Ph.D. in biomedical engineering and biotechnology and M.S. in computer science from the University of Massachusetts. He earned his B.S. in computer science from SUNY, Empire State College. + +
+ +

Workflow Preservation and Reproducibility with BCO-RO

+ +Date: Wednesday May 12 at 11:00AM-12:30PM ET + + +Purpose: Training session showing how Research Objects (RO) can package BioCompute Objects (BCO) for Digital Preservation and Reproducibility. Research Objects (RO) are a machine-readable digital preservation effort that aims to package all constituent elements of an analysis together into one archive with very detailed provenance. Here, Stian Soiland-Reyes, a Technical Architect on the Research Objects project, will describe an example that packages the workflow as a descriptive, human-readable report in the form of a BioCompute Object (BCO), and which bundles everything in an RO "Crate." Stian will explain the Research Objects project, and introduce a tutorial for building an RO-BCO archive. RO-BCOs can be efficient solutions for scaling up data analyses, both for internal record keeping and logistics, and for communicating workflows to outside groups. + + +

2020

+ +

Introduction to workflow portability with BCO-CWL

+ +Date: Friday November 20th at 12-2PM ET + + +Purpose: BioCompute Objects (BCOs) were developed to aid in communicating a more thorough understanding of computational analyses. While BCOs can be leveraged for re-execution within the context of specific platforms that have integrated them, they are not used for cross platform implementations. Common Workflow Language (CWL) was developed to assist in the portability of execution, meaning the ability to reproduce a pipeline in a different computational environment. The BCO and CWL teams have partnered over the last year to develop a joint mechanism that enables both portability of execution and strong human- and machine-readable documentation through metadata records. New functionality of BCO-CWL means that a reviewer may be able to independently run a computational pipeline used by a sponsor if using a command line environment, or on a platform that supports CWL. This presentation will go over the project by introducing the concept of portability of execution, the concept of a CWL file, and demonstrate the initial draft of a BCO-CWL implementation. +
+
+

BioCompute Advisory Boards Workshop

+ +Date: Wednesday March 18, 2020 2-4pm ET + +Purpose: The purpose of this workshop is to facilitate dialogue between Advisory Board(s) members on BioCompute applications, vocabulary, and current + future progression of the project through a hands-on approach. These discussions are a means to obtain feedback, introduce potential use-cases, and bring everyone up to speed about BioCompute. +
+
+

BioCompute Workshop for Reviewers: Tool for Communicating Sequencing Analysis

+ +Date: Wednesday June 24, 2020 10am-12pm ET + +Purpose: The purpose of this workshop is to facilitate dialogue and show BCO utility specifically for FDA reviewers. We will be briefly discussing BioCompute applications, vocabulary, current + future progression of the project in addition to a hands-on approach to reviewing a BCO for. These discussions are a means to introduce BCO as a tool for submission evaluation mechanism obtain feedback. + +
    +
  • Slide deck available [here](/docs/ReviewerWorkshop_24June2020_Deck.pdf)
  • +
  • Quick reference guide can be found [here](/docs/BCOCheatSheet.pdf) (PDF)
  • +
  • Post-workshop attendee survey available [here](https://www.surveymonkey.com/r/Q9LXSC6)
  • +
+
+ +

BioCompute Objects: Methods for communicating provenance of data and analysis

+ +
Part of the Biocuration 2020 Online Workshop series
+ +Date: Thursday September 24, 9am PT, 12pm ET, 5pm CET + +Organizers: Charles Hadley King, Raja Mazumder, Jonathon Keeney; George Washington University + +Purpose: Inform about BioCompute Object use and purpose and offer tutelage in the creation and use of BCOs + +
  • Abstract available here
+ +
+ +

2019

+ +

BioCompute Objects: Tools for Communicating NGS Data and Analysis

+ +Date: Tuesday May 14, 2019 + +Organizers: [FDA Center for Biologics Evaluation and Research (CBER)](https://www.fda.gov/about-fda/fda-organization/center-biologics-evaluation-and-research-cber) + +**Purpose:** The BioCompute project has resulted in three prior workshops, two publications, several collaborations, and is currently undergoing formal balloting to become an official IEEE standard. The upcoming Workshop will engage more stakeholders in creating and using BioCompute for NGS and other bioinformatics data analysis communications with the FDA. Specifically, the Workshop will have two components: use case examples, and hands on & demonstrations of new tools that leverage BioCompute. A new Precision FDA-BioCompute Challenge will also be launched at the event. + + + +
+ +

2018

+ +

BioCompute Objects PoC Workshop

+ + +
+ +

2014

+ +

2014 Public Workshop: Next Generation Sequencing Standards

+ + + +
+ + + +
\ No newline at end of file diff --git a/HCV1a.json b/content/examples/HCV1a.json similarity index 96% rename from HCV1a.json rename to content/examples/HCV1a.json index abb6f0a..885d20e 100644 --- a/HCV1a.json +++ b/content/examples/HCV1a.json @@ -1,7 +1,7 @@ { - "object_id": "https://w3id.org/biocompute/1.3.0/examples/HCV1a.json", - "etag": "8e03f6f65e2caae86770a64a9ebef3e0651d4675a96687e5b148fda181c9cdc0", - "spec_version": "https://w3id.org/biocompute/1.3.0/", + "object_id": "https://raw.githubusercontent.com/biocompute-objects/BCO_Specification/1.4.2/examples/HCV1a.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/", "provenance_domain": { "name": "HCV1a ledipasvir resistance SNP detection", "version": "2.9", @@ -71,7 +71,7 @@ ], "extension_domain": [ { - "extension_schema": "https://github.com/biocompute-objects/BCO_Specification/blob/1.4.0/schemas/extension_domain/fhir_extension.json", + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", "fhir_extension": [ { "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", @@ -102,7 +102,7 @@ ] }, { - "extension_schema": "https://github.com/biocompute-objects/BCO_Specification/blob/1.4.0/schemas/extension_domain/fhir_extension.json", + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", "scm_extension": { "scm_repository": "https://github.com/example/repo1", "scm_type": "git", diff --git a/examples/HIVE_metagenomics.json b/content/examples/HIVE_metagenomics.json similarity index 91% rename from examples/HIVE_metagenomics.json rename to content/examples/HIVE_metagenomics.json index 2761290..ef13b4e 100644 --- a/examples/HIVE_metagenomics.json +++ b/content/examples/HIVE_metagenomics.json @@ -1,7 +1,7 @@ { - "object_id": "https://w3id.org/biocompute/1.3.0/examples/HIVE_metagenomics.json", - "checksum": "09d55a44601184330b05dae6c3d49e9c3821f528a93f958edec92504fef41970", - "spec_version" : "https://w3id.org/biocompute/1.3.0/", + "object_id": "https://raw.githubusercontent.com/biocompute-objects/BCO_Specification/1.4.2/examples/HIVE_metagenomics.json", + "etag": "caed07395b6afb58c8810d174a315260124f687740bc3bb14387de5e84c7e3d4", + "spec_version" : "https://w3id.org/ieee/ieee-2791-schema/", "provenance_domain": { "name": "Healthy human fecal metagenomic diversity", "version": "1.0.0", @@ -48,15 +48,18 @@ "Identify the general community composition of organisms in a human [taxID:9606] fecal [UBERON:0001988] sample, ", "CensuScope is used to do a census of the composition of the read files. Based on a user-defined threshold, organisms identified are used for alignment in the Hexagon alignment." ], - "extension_domain":{ - "scm_extension": { - "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", - "scm_type": "git", - "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", - "scm_path": "biocompute-objects/HIVE_metagenomics", - "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" - } - }, + "extension_domain":[ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", + "scm_type": "git", + "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", + "scm_path": "biocompute-objects/HIVE_metagenomics", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ], "description_domain": { "keywords": [ "metagenome", diff --git a/examples/LICENSE b/content/examples/LICENSE similarity index 100% rename from examples/LICENSE rename to content/examples/LICENSE diff --git a/examples/README.md b/content/examples/README.md similarity index 100% rename from examples/README.md rename to content/examples/README.md diff --git a/examples/UVP.json b/content/examples/UVP.json similarity index 98% rename from examples/UVP.json rename to content/examples/UVP.json index 04bb20d..be6983a 100644 --- a/examples/UVP.json +++ b/content/examples/UVP.json @@ -1,6 +1,6 @@ { "object_id": "https://w3id.org/biocompute/1.3.0/examples/UVP_BCO.json", - "checksum": "e07ad61a2e13272437dc052de7502d8af8a122dc2865ff0f382f459983c0bccc", + "etag": "39fb1c62f43ff72ac95f91a433d5e425fb08bc07ec0f719ecfd27fb3cd3a3635", "spec_version": "https://w3id.org/biocompute/1.3.0/", "provenance_domain": { "name": "Lineage assignment for an isolate of M. tuberculosis based on its single nucleotide polymorphism (SNP) profile based on UVC v1.0.", @@ -110,14 +110,17 @@ "usability_domain": [ "Lineage assignment for an isolate of M. tuberculosis[taxonomy:1773] based on its single nucleotide polymorphism [so:0000694] (SNP) profile." ], - "extension_domain": { - "scm_extension": { - "scm_repository": "https://github.com/CPTR-ReSeqTB/UVP", - "scm_type": "git", - "scm_commit": "9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3", - "scm_path": "UVP/scripts/UVP.py" + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/CPTR-ReSeqTB/UVP", + "scm_type": "git", + "scm_commit": "9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3", + "scm_path": "UVP/scripts/UVP.py" + } } - }, + ], "description_domain": { "keywords": [ "Mycobacterium tuberculosis", diff --git a/examples/glycosylation-sites-UniCarbKB.json b/content/examples/glycosylation-sites-UniCarbKB.json similarity index 88% rename from examples/glycosylation-sites-UniCarbKB.json rename to content/examples/glycosylation-sites-UniCarbKB.json index fb482f0..be49e02 100644 --- a/examples/glycosylation-sites-UniCarbKB.json +++ b/content/examples/glycosylation-sites-UniCarbKB.json @@ -1,7 +1,7 @@ { - "object_id": "https://w3id.org/biocompute/1.3.0/examples/glycosylation-sites-UniCarbKB", - "checksum": "c0b41938a47f7b7689a34f5d89707f039e210f3cc22857b80070f73cbcd2e669", - "spec_version" : "https://w3id.org/biocompute/1.3.0/", + "object_id": "https://raw.githubusercontent.com/biocompute-objects/BCO_Specification/1.4.2glycosylation-sites-UniCarbKB", + "etag": "5741d66ddf7881db33f7075ce8b64b941bd7cc001965f31682e5da9966c7f3ba", + "spec_version" : "https://w3id.org/ieee/ieee-2791-schema/", "provenance_domain":{ "name": "glycosylation-sites-UniCarbKB", "version": "1.0", @@ -44,18 +44,26 @@ "usability_domain":[ "List of human [taxid:9606] proteins with information on glycosylation sites from UniCarbKB database [https://academic.oup.com/nar/article/42/D1/D215/1052197, https://doi.org/10.1093/nar/gkt1128]" ], - "extension_domain":{ - "license":{ - "data_license": "https://creativecommons.org/licenses/by/4.0/", - "scripts_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + "extension_domain":[ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/license/license_extension.json", + "license_extension":{ + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "scripts_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + } }, - "scm_extension":{ - "scm_repository": "https://github.com/GW-HIVE/glygen-backend-integration/", - "scm_type": "git", - "scm_commit": "d34b85553e775dd5452005d786fe6e47d6048ee0", - "scm_path": "/data/projects/glygen/generated/datasets/reviewed/human_proteoform_glycosylation_sites_unicarbkb_glytoucan.readme.txt" + { + + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension":{ + "scm_repository": "https://github.com/GW-HIVE/glygen-backend-integration/", + "scm_type": "git", + "scm_commit": "d34b85553e775dd5452005d786fe6e47d6048ee0", + "scm_path": "/data/projects/glygen/generated/datasets/reviewed/human_proteoform_glycosylation_sites_unicarbkb_glytoucan.readme.txt" + } } - }, + + ], "description_domain":{ "keywords":[ "protein", @@ -214,10 +222,8 @@ }, "error_domain":{ "empirical_error":{ + "comment": "Unique value statistics for the dataset", "statistics":[ - { - "comment": "Unique value statistics for the dataset" - }, { "key": "uniprotkb_canonical_ac", "value":92, @@ -251,7 +257,7 @@ { "key": "glycosylation_type", "value":3, - "description": "Type of glycosylation [linkage type]" + "description": "Type of glycosylation linkage type" } ] }, diff --git a/docs/execution-domain.md b/content/execution-domain.md similarity index 89% rename from docs/execution-domain.md rename to content/execution-domain.md index 350c10d..80b30fb 100644 --- a/docs/execution-domain.md +++ b/content/execution-domain.md @@ -1,10 +1,28 @@ -_This document is part of the [BioCompute Object User Guide](user_guide.md)_ +--- +title: "Execution Domain" +menu: "main" +--- -_Back to [BCO domains](bco-domains.md)_ + + + +
+BioCompute Logo +
+ +
+ +_This document is part of the [BioCompute Object User Guide](/user_guide)_ + +_Back to [BCO domains](/bco-domains)_ ## 2.5 Execution Domain "execution_domain" -This section defines the `execution_domain` part of the [BCO](bco-domains.md). +This section defines the `execution_domain` part of the [BCO](/bco-domains). The fields required for execution of the BCO have been encapsulated together in order to clearly separate information needed for deployment, software configuration and running applications in a dependent environment. One byproduct of an accurate BCO definition is facilitation of reproducibility as defined by the *Oxford English Dictionary* as "the extent to which consistent results are obtained when produced repeatedly." diff --git a/docs/extension-fhir.md b/content/extension-fhir.md similarity index 83% rename from docs/extension-fhir.md rename to content/extension-fhir.md index 9cd164e..3174fe9 100644 --- a/docs/extension-fhir.md +++ b/content/extension-fhir.md @@ -1,6 +1,24 @@ -_This document is part of the [BioCompute Object specification](bco-specification.md)_ +--- +title: "BCO Introduction" +menu: "main" +--- -_Back to [BCO domains](bco-domains.md)_ + + + +
+BioCompute Logo +
+ +
+ +_This document is part of the [BioCompute Object User Guide](/user_guide)_ + +_Back to [BCO domains](/bco-domains)_ ### 2.3.1 Extension to External References: SMART on FHIR Genomics diff --git a/docs/extension-scm.md b/content/extension-scm.md similarity index 75% rename from docs/extension-scm.md rename to content/extension-scm.md index 04ee735..b526c83 100644 --- a/docs/extension-scm.md +++ b/content/extension-scm.md @@ -1,6 +1,24 @@ -_This document is part of the [BioCompute Object User Guide](user_guide.md)_ +--- +title: "Extension to External References: Software Configuration Management (SCM)" +menu: "main" +--- -_Back to [BCO domains](bco-domains.md)_ + + + +
+BioCompute Logo +
+ +
+ +_This document is part of the [BioCompute Object User Guide](/user_guide)_ + +_Back to [BCO domains](/bco-domains)_ ### 2.3.2 Extension to External References: Software Configuration Management (SCM) diff --git a/docs/external-references.md b/content/external-references.md similarity index 83% rename from docs/external-references.md rename to content/external-references.md index 768c9ca..fdd287c 100644 --- a/docs/external-references.md +++ b/content/external-references.md @@ -1,4 +1,24 @@ -_This document is part of the [BioCompute Object User Guide](user_guide.md)_ +--- +title: "External References" +menu: "main" +--- + + + + +
+BioCompute Logo +
+ +
+ +_This document is part of the [BioCompute Object User Guide](/user_guide)_ + +_Back to [BCO domains](/bco-domains)_ ## 3.2 Appendix-II: External reference database list diff --git a/docs/introduction.md b/content/introduction.md similarity index 95% rename from docs/introduction.md rename to content/introduction.md index b60e3c9..b04de17 100644 --- a/docs/introduction.md +++ b/content/introduction.md @@ -1,4 +1,22 @@ -_This document is part of the [BioCompute Object User Guide](user_guide.md)_ +--- +title: "BCO Introduction" +menu: "main" +--- + + + + +
+BioCompute Logo +
+ +
+ +_This document is part of the [BioCompute Object User Guide](/user_guide)_ # 1 Introduction to BioCompute Objects diff --git a/docs/io-domain.md b/content/io-domain.md similarity index 81% rename from docs/io-domain.md rename to content/io-domain.md index 33bc357..0ff14d2 100644 --- a/docs/io-domain.md +++ b/content/io-domain.md @@ -1,19 +1,37 @@ -_This document is part of the [BioCompute Object User Guide](user_guide.md)_ +--- +title: "I/O Domain" +menu: "main" +--- -_Back to [BCO domains](bco-domains.md)_ + + + +
+BioCompute Logo +
+ +
+ +_This document is part of the [BioCompute Object User Guide](/user_guide)_ + +_Back to [BCO domains](/bco-domains)_ ## 2.7 Input and Output Domain "io_domain" -This section defines the `io_domain` part of the [BCO](bco-domains.md). +This section defines the `io_domain` part of the [BCO](/bco-domains). This represents the list of global input and output files created by the computational workflow, excluding the intermediate files. These fields are pointers to objects that can reside in the system performing the computation or any other accessible system. Just like the fields of parametric domain, these fields are expected to vary depending on the specific BCO implementation and can refer to named input output arguments of underlying pipelines. Please refer to documentation of individual scripts and specific BCO descriptions for further details. -Condensed exampled: +Condensed example: ```json "io_domain": { - "input_subdomain": { - }, + "input_subdomain": [ + ], "output_subdomain": [ ] }, diff --git a/content/news.md b/content/news.md new file mode 100644 index 0000000..0c5f775 --- /dev/null +++ b/content/news.md @@ -0,0 +1,94 @@ +--- +title: "News" +menu: "main" +--- + +
+ +
+ +
+ +**July 22nd, 2020** + +FDA announces support for BioCompute! The July 22nd, 2020 edition of the Federal Register [announced](https://www.federalregister.gov/documents/2020/07/22/2020-15771/electronic-submissions-data-standards-support-for-the-international-institute-of-electrical-and) that the FDA now supports BioCompute (officially known as IEEE 2791-2020), and that the standard will be added to the [Data Standards Catalog](https://www.fda.gov/industry/fda-resources-data-standards). + +**June 24th, 2020** + +BioCompute training for FDA Reviewers and administrators took place today virtually. The workshop introduced the core concepts of BioCompute, terminology, and walked through usage examples in the context of regulatory submission filings, taken from sample use cases organized with FDA Reviewers. The training was constructed with input from advisory boards, and was focused on receiving a BCO as part of a regulatory filing, rather than technical details. What kinds of information is included in a BCO, where to find relevant information in certain scenarios, and ways to use additional optional fields to include or ask for more information were presented. Supplemental training material like the [quick reference guide](/docs/BCOCheatSheet.pdf) (PDF) was also provided. + +**May 14th, 2020** + +The IEEE Standard, now known as 2791-2020, has [officially published](https://standards.ieee.org/content/ieee-standards/en/standard/2791-2020.html). The standard can be purchased through the IEEE website, and the open source Schema referred to in the standard can be accessed [here](https://opensource.ieee.org/2791-object/ieee-2791-schema). + +**March 18th, 2020** + +Training for FDA personnel begins! The BioCompute team met with FDA reviewers and administrators to discuss the needs of FDA personnel and how best to implement BioCompute training. In a series of workshops, tutorials, demonstrations, and other training exercises, the BioCompute team will work with FDA Reviewers and administrators to understand BCOs and interpret information in BCO format, and will discuss the Extension Domains for acquiring extra information. + +**January 30th, 2020** + +The IEEE Standards Association (IEEE SA) has officially approved P2791 for publication. This marks formal acceptance as a standard, and clears the way for publishing. P2791 is one of the first standards to go through the IEEE Open Source Pilot Project, meaning the source for the entire project is open source (currently available [here](https://gitlab.com/IEEE-SA/2791/ieee-2791-schema)). Updates will be posted when the standard is officially published. + +**January 8th, 2020** + +The Review Committee (RevCom) for the Institute of Electrical and Electronics Engineers (IEEE) voted to recommend approving P2791, the proposed standard that embodies the BioCompute specification. The [specification](/specification) is an open source document (viewable [here](https://gitlab.com/IEEE-SA/2791/ieee-2791-schema)) that describes the propsed standard, and the vote to approve is major milestone in the project's development. The next step is for the Standards Association (IEEE SA) to vote on the proposed standard based on this recommendation. In the event that it is approved, P2791 will become a formal, IEEE recognized standard! + +**October 18th, 2019** + +The BCO Challenge on PrecisionFDA has closed. Reviewers are currently evaluating both the beginner and advanced tracks, and will report their top performers on the [PrecisionFDA website](https://precision.fda.gov/challenges/7/view/results). Congratulations to all who participated, and thank you for helping to build the BioCompute project! + +**September 23rd, 2019** + +We're excited to announce a new program to build a BioComputeDB for the FDA. The database will be a publicly accessible mechanism to facilitate better scientific communication of workflows with little additional communication, outside of the initial submission. As part of the program, we will gather input regarding needs and use cases from FDA personnel that will be used to build the database, and we will train FDA reviewers and researchers. Initial workshops will begin at the FDA in early 2020. + +**July 14th, 2019** + +BioCompute will be represented at the 52nd annual [Association of Pathology Chairs](https://www.apcprods.org/) Meeting in Boston, MA July 21 - 24. To learn how BioCompute can help support innovation in pathology, please email Jonathon Keeney at keeneyjg@gwu.edu to speak in person, or visit the table near registration, where information will be available. [#APC19Boston](https://twitter.com/hashtag/APC19Boston) + +**May 14th, 2019** + +The [2019 BioCompute Workshop](https://www.fda.gov/vaccines-blood-biologics/workshops-meetings-conferences-biologics/biocompute-objects-tools-communicating-ngs-data-and-analysis-public-workshop-05142019-05152019) was hosted at the FDA on May 14th, 2019. The workshop featured presentations from FDA reviewers, private sector bioinformatics companies and analysis platforms, a patient advocacy representative, and academic researchers, including discussions of integrating other standards like [Common Workflow Language](https://www.commonwl.org/) and [FHIR Genomics](https://www.hl7.org/fhir/genomics.html). The 2019 Workshop also had an outstanding discussion panel that explored the current challenges in communicating NGS analysis pipelines, and how BioCompute might help address those challenges, and kicked off a [BioCompute Challenge on the PrecisionFDA platform](https://precision.fda.gov/challenges/7). + +**March 27th, 2019** + +We're excited to announce that a proposal to expand BioCompute usage through the development of BCOs in a cloud computing environment for Galaxy [has been accepted by the National Science Foundation](https://internet2.edu/cloud/exploring-clouds-for-acceleration-of-science/e-cas-research-projects/)! The proposal will create a library of BCOs describing bioinformatic workflows on Amazon Web Services through the open source platform [Galaxy](https://galaxy.aws.biochemistry.gwu.edu/). The proposal is part of Internet2's cooperative agreement with the National Science Foundation, called [Exploring Clouds for Acceleration of Science Project](https://www.nsf.gov/news/news_summ.jsp?cntn_id=297193). + +**March 7th, 2019** + +The 2019 BioCompute Workshop registration site is live! The United States Food and Drug Administration (FDA) has partnered with the George Washington University [to host](https://www.fda.gov/vaccines-blood-biologics/workshops-meetings-conferences-biologics/biocompute-objects-tools-communicating-ngs-data-and-analysis-public-workshop-05142019-05152019) the 4th BioCompute Workshop. More details can be found [on the FDA's website](https://www.fda.gov/BiologicsBloodVaccines/NewsEvents/WorkshopsMeetingsConferences/ucm632914.htm). + +**February 28th, 2019** + +The IEEE draft specification of P2791 has passed the Mandatory Editorial Coordination (MEC) review and was approved by the [EMB Standards Committee](http://standards.embs.org/). This has moved the draft specification to the ballot phase, and ballot invitations have been sent out. If you would like to participate in the balloting phase, please contact the P2791 Secretary, Jonathon Keeney, at keeneyjg@gwu.edu. The first iteration of the specification is a descriptive standard that will underpin all future, computationally integrated specifications. + +**January 17th, 2019** + +The [PLoS Bio paper](https://journals.plos.org/plosbiology/article?id=10.1371/journal.pbio.3000099) describing the BioCompute project [has been featured on the PLoS Open Source Toolkit](https://channels.plos.org/open-source-toolkit)! The Open Source Toolkit is a global forum for open source hadware and software. Its forum helps BioCompute reach a wider audience and cement its status as an invaluable specification for the communication of high throughput sequencing analysis. The paper is currently the first paper in "Featured Research." + +**December 31st, 2018** + +Several members of the BioCompute community, in both the public and private sectors, have joined together to [publish a manuscript](https://journals.plos.org/plosbiology/article?id=10.1371/journal.pbio.3000099) describing the BioCompute project. The theory, utility, and implementation of the BioCompute specification are described, along with use cases. + +**December 23rd, 2018** + +The [P2791 Working Group](http://sites.ieee.org/sagroups-2791/) to build an IEEE BioCompute standard has formally voted to move ahead with the process. This means that the IEEE Sponsor will review the document, followed by an official Ballot Group review, and a public comment period. This is exciting news, and a major step for BioCompute. Standardization of BioCompute will create a formal mechanism for creating, commenting on, and using a way to communicate next generation sequencing data in a consensus-driven way. + +**October 1st, 2018** + +In order to explore the possibility of joining the Open Source Pilot Project, the BioCompute IEEE Working Group (P2791) meeting originally scheduled for October 5th has been postponed to October 22nd, 2018, at 1PM Eastern. The meeting will be held by WebEx at that time. This meeting is open to the public. + +**September 19th, 2018** + +2019 BioCompute Workshop announced! The next BioCompute Workshop will be held on March 25th, 2019 at the FDA's White Oak campus. A Scientific Advisory Board will be formed and convened to discuss the agenda and speakers. + +**September 11th, 2018** + +BioCompute is [presented](https://twitter.com/NeuroGenomics/status/1039643176267669505) at the IEEE Standards Association Workshop: Standards for Digital Data in Ehealth. The workshop, hosted by the [IEEE Standards Association](https://standards.ieee.org/), explored standards in cutting edge technologies being deployed in healthcare. As a mechanism to bridge efforts between researchers, clinicians, and the regulatory pipeline, BioCompute helps to advance personalized medicine by enabling better communication between these key drivers. + +**August 29th, 2018** + +Kickoff meeting for the IEEE Working Group convened! The IEEE Working Group, P2791, has held its first meeting. The meeting was well attended by representatives from the FDA, as well as several universities, biotech, and pharmaceutical enterprises. With a core constituency of voting members established, future Working Group meetings will work to establish Version 1.3 of the [Specification Document](https://github.com/biocompute-objects/BCO_Specification) into a formal IEEE Standard. + +**July 24th, 2018** + +As part of its efforts to host NIH funded data sets through the [STRIDES](https://commonfund.nih.gov/data) program, Google [today announced](https://www.blog.google/products/google-cloud/building-a-global-biomedical-data-ecosystem-with-the-national-institutes-of-health/) that it will use the BioCompute standard to help make these datasets more accessible. BioCompute helps build transparency and reproducibility into work flows in the high throughput sequencing space, and will improve the utility of NIH funded datasets. diff --git a/content/parametric-domain.md b/content/parametric-domain.md new file mode 100644 index 0000000..f35a787 --- /dev/null +++ b/content/parametric-domain.md @@ -0,0 +1,35 @@ +--- +title: "Parametric Domain" +menu: "main" +--- + + + + +
+BioCompute Logo +
+ +
+ +_This document is part of the [BioCompute Object User Guide](/user_guide)_ + +_Back to [BCO domains](/bco-domains)_ + +## 2.6 Parametric Domain "parametric_domain" + +This represents the list of **NON-default** parameters customizing the computational flow which can affect the output of the calculations. These fields can be custom to each kind of analysis and are tied to a particular pipeline implementation. The `parametric_domain` is not used for running/reproducing a bco e.g. not used by the [`execution_domain`](/execution-domain.md). It is _recommended_ these fields be generated automatically, but that may not always be possible. Please refer to documentation of individual scripts and specific BCO descriptions for details. While this domain is NOT required, it is recommended that it be populated. + +```json +"parametric_domain": [ + {"param": "seed", "value": "14", "step": "1"}, + {"param":"minimum_match_len", "value": "66", "step": "1"}, + {"param": "divergence_threshold_percent", "value": 0.30, "step": "1"}, + {"param": "minimum_coverage", "value": "15", "step": "2"}, + {"param": "freq_cutoff", "value": 0.10, "step": "2"} +] +``` diff --git a/docs/provenance-domain.md b/content/provenance-domain.md similarity index 87% rename from docs/provenance-domain.md rename to content/provenance-domain.md index 1a6ac99..e773af6 100644 --- a/docs/provenance-domain.md +++ b/content/provenance-domain.md @@ -1,10 +1,28 @@ -_This document is part of the [BioCompute Object User Guide](user_guide.md)_ +--- +title: "Provenance Domain" +menu: "main" +--- -_Back to [BCO domains](bco-domains.md)_ + + + +
+BioCompute Logo +
+ +
+ +_This document is part of the [BioCompute Object User Guide](/user_guide)_ + +_Back to [BCO domains](/bco-domains)_ ## 2.1 Provenance Domain "provenance_domain" -This section defines the fields of the `provenance_domain` part of the [BCO](bco-domains.md) structure. +This section defines the fields of the `provenance_domain` part of the [BCO](/bco-domains) structure. Condensed example: @@ -44,10 +62,10 @@ Records the versioning of this BCO instance object. [Semantic Versioning 2.0.0]( >3. PATCH version when you make backwards-compatible bug fixes. >Additional labels for pre-release and build metadata are available as extensions to the MAJOR.MINOR.PATCH format. -BCO versioning should adhere to semantic versioning. +BCO versioning should adhere to semantic versioning. Given the above conditions a MAJOR version would qualify for a new BCO, and therefore it is RECCOMENDED that the versioning of a BCO only utilize MINOR and PATCH, or two digits. ```json -"version": "2.1.0", +"version": "2.1", ``` ### 2.1.3 Review "review" @@ -61,7 +79,7 @@ The "status" key describes the status of an object in the review process and the * `suspended` flag indicates an object that was once valid is no longer considered valid. * `rejected` flag indicates that an error or inconsistency was detected in the BCO, and it has been removed or rejected. -The fields from the `contributor` object (described in [section 2.1.9](/provenance-domain.md#219-contributors-contributors)) are used to populate the reviewer section. Each BCO MUST have at least one `review`. +The fields from the `contributor` object (described in [section 2.1.9](/provenance-domain.md#219-contributors-contributors)) are used to populate the reviewer section. Each BCO SHOULD have at least one `review`. ```json "review": [ @@ -159,7 +177,7 @@ This is a list to hold contributor identifiers and a description of their type o ### 2.1.10 License "license" -A space for Creative Commons licence or other licence information (text). The default or recommended licence can be _Creative Commons Attribution 4.0 International_ identified as https://spdx.org/licenses/CC-BY-4.0.html +A space for Creative Commons licence or other license information (text). The default or recommended licence can be _Creative Commons Attribution 4.0 International_ identified as https://spdx.org/licenses/CC-BY-4.0.html ```json "license": "https://spdx.org/licenses/CC-BY-4.0.html" diff --git a/content/release_protocol.md b/content/release_protocol.md new file mode 100644 index 0000000..4910c26 --- /dev/null +++ b/content/release_protocol.md @@ -0,0 +1,34 @@ +BioCompute Release Protocol +=========================== + + - [x] **Prep** + - ~~[x] Create a release issue: release_1.4.0.~~ + - ~~[x] Set freeze date freeze date [1/24/2020].~~ + - [ ] **Branch Release (on or around freeze date)** + - [ ] Ensure all [blocking milestone issues](https://github.com/biocompute-objects/BCO_Specification/milestones) have been closed. + - [ ] Merge the latest release into dev and push upstream. + - [ ] **Deploy and Test Release** + - [ ] Review issues and ensure they all have a milestones attached. [Link](https://github.com/biocompute-objects/BCO_Specification/milestones) + - [ ] Checkout release branch. + - [ ] Run `schemas/validate.py` on each of the examples in `examples/*`, updating if necessary. + - [ ] Check for obvious missing or inconsistent documentation in release. + - [ ] **Create Release Notes** + - [ ] Open newly edited/created files and manually curate for accuracy. + - [ ] If any changes are made, ensure you commit branch again and document in commit what was changed. + - [ ] Run `sh scripts/log.sh` from the repo root. + - [ ] Check for obvious missing or inconsistent documentation in release notes (`CHANGELOG.md`). + - [ ] Commit branch. + - [ ] **Do Release** + - [ ] Ensure all [blocking milestone issues](https://github.com/biocompute-objects/BCO_Specification/milestones) have been resolved. + - [ ] Create and push new release tag. + - [ ] **Announce Release** + + - [ ] Verify release included in https://github.com/biocompute-objects/BCO_Specification/releases + - [ ] Review announcement on **https://biocomputeobject.org/** + - [ ] Tweet docs news *highlights* link as @BioComputeObj on twitter. [@kee007ney will provide an example in the future](https://twitter.com/BioComputeObj). + - [ ] Email *highlights* to listserv [@kee007ney will provide an example in the future](https://twitter.com/BioComputeObj).. + + - [ ] **Prepare for next release** + - [ ] Close milestone ``${version}`` and ensure milestone ``${next_version}`` exists. + - [ ] Create release issue for next version ``release_${next_version}``. + - [ ] Close this issue. \ No newline at end of file diff --git a/content/sop.md b/content/sop.md new file mode 100644 index 0000000..36b05c3 --- /dev/null +++ b/content/sop.md @@ -0,0 +1,87 @@ +--- +title: "BCO Curation SOP" +menu: "main" +--- + + + + +
+BioCompute Logo +
+ +
+ +# BCO Curation SOP +#### Author: BioCompute Consortium +#### Version: 2.0 +#### Effective Date: Aug 2020 + +_Intended audience: authors and developers_ + +The following recommendations are intended to provide guidance on BCO™ creation, versioning, certification and authentication. + + +## BCO IDs and Versioning + +_Intended Audience: BCO authors_ + +- BioCompute IDs are used as persistent URLs. A novel usability domain must result in the creation of a new BCO with a new BCO ID. BCO IDs are immutable upon creation, and are never deleted or retired. If the usability domain (UD) remains unchanged, this results in a new version of the BCO. BCO ID example: OMX_000001 +- BCO major and minor versions can be incremented based on project/institution documented policies. +- The BioCompute consortium maintains a database of registered authorities. Registered authorities are able to assign their reserved prefixes to their own IDs in the object_id field, such as OMX_000001. We encourage that everyone registers a prefix at biocomputeobject.org. + +## BioCompute Certification(s) and Authentication +_Intended Audience: commercial or academic entities looking for additional BCO support_ + +**Platform certification:** A BioCompute "audit" will be conducted by the BioCompute Consortium. +Requirements include: + +- IEEE-2791 conformant BCOs can be created +- Security (ex: immutable upon creation, secure sharing, platform security) +- Data QC processes on input/output + +**Syntactical certification:** Code is available on GitHub for download and use to ensure standard compliance. + +**Scientific certification:** BCO consortium members will participate in the certification process; each certification process is projected to take ~ 3 months to 1 year for the development of pipelines. Verification Kit: Input+output file(s) (in-silico generated), and Template BCO (tBCO) that includes error domain). + +**Template and Run Authentication:** The Template BCO (tBCO) is created once along with a Verification Kit. Verification Kit includes usually in silico generated input files, BCO (with error domain) and output files. Run BCOs (rBCO) uses the tBCOs, and the only changes allowed are in input (excluding reference files/databases) and output files field. tBCOs and rBCOs can be authenticated using secure blockchain technology. + +- Template certification requirements: Input + output files +- Run certification requirements: certified template + run BCO (to confirm that parameters and error domain are within range etc.) + +
+BioCompute Logo +

+ +## BCO Metadata +The three metadata fields are filled out at the time of submission. Validity check fills in the `spec_version` with the IEEE URL, an option to run a SHA256 (or just input your own hash value) for `etag`, and `object_id` is assigned (with option to choose from any prefix associated with the account). + + +## Domain-specific guidance +### _Execution domain_ + +When recording manual curation, the `script` field of the `execution_domain` should link to a Google Document or GitHub markdown that describes the steps, either programmatically or in a stepwise fashion. Manual curation steps should ALSO be properly documented in the `description_domain`. An easy way to conceptualize this is: Description domain is for people, Execution domain is for machine (or programmers). + +### _Extension domain_ +Format of how the schema would be defined: [Execution domain](https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.0.0/dataset/dataset_extension.json) + + +### _Error domain_ +This domain can support a “QA/QC rules” subdomain which provides rules that, if the output file does not pass the appropriate criteria, then it is flagged as an error. + +## BCO Form-based portal +_Intended Audience: BCO tool developers and authors_ + +BCOs can be created using any bioinformatics platform that has BCO read and write functionalities. For users who do not have access to a bioinformatics platform they can use the [BCO Consortium Editor tool](https://portal.aws.biochemistry.gwu.edu/sign-in) which has some of the basic API functionalities: +- Create a BCO that is conformant to IEEE-2791. +- Upload BCOs in batch mode. The tool runs QA/QC processes on those uploads and create unique IDs +- Search for existing BCOs by author/title/usability/keywords +- Download and install an instance within an organization’s firewall +- View videos and documentation on tool use + + +This documentation is currently in the comment phase until Sept. 15, 2020. Please send your comments to Jonathon Keeney. diff --git a/docs/top-level.md b/content/top-level.md similarity index 71% rename from docs/top-level.md rename to content/top-level.md index 793905b..d025252 100644 --- a/docs/top-level.md +++ b/content/top-level.md @@ -1,6 +1,24 @@ -_This document is part of the [BioCompute Object User Guide](user_guide.md)_ +--- +title: "Top Level Domains" +menu: "main" +--- -_Back to [BCO domains](bco-domains.md)_ + + + +
+BioCompute Logo +
+ +
+ +_This document is part of the [BioCompute Object User Guide](/user_guide)_ + +_Back to [BCO domains](/bco-domains)_ ## 2.0 Top Level Fields @@ -10,7 +28,7 @@ Condensed example: ```json { - "spec_version" : "https://w3id.org/biocompute/1.3.0/", + "spec_version" : "https://w3id.org/ieee/ieee-2791-schema/", "object_id": "https://example.com/bco/9487ae7e-c1aa-4a3c-b18f-3d3695b33ace", "etag": "d41d8cd98f00b204e9800998ecf8427e", "provenance_domain": { @@ -24,7 +42,7 @@ Condensed example: The version of the BCO specification used to define the BCO. It is recomended that this value be a permalink as defined in the [w3id.org/biocompute](https://github.com/perma-id/w3id.org/tree/master/biocompute) repository. ```json -"spec_version": "https://w3id.org/biocompute/1.3.0/" +"spec_version": "https://w3id.org/ieee/ieee-2791-schema/" ``` ### 2.0.2 BioCompute Object Identifier "object_id" @@ -57,5 +75,5 @@ will generate the following: ### Additional domains -Additional description about the BCO itself is also provided in the [provenance domain](provenance-domain.md), [description domain](description-domain.md) and [usability domain](usability-domain.md). Other [domains](bco-domains.md) detail areas like execution and error ranges. +Additional description about the BCO itself is also provided in the [provenance domain](/provenance-domain), [description domain](/description-domain) and [usability domain](/usability-domain). Other [domains](/bco-domains) detail areas like execution and error ranges. diff --git a/docs/usability-domain.md b/content/usability-domain.md similarity index 75% rename from docs/usability-domain.md rename to content/usability-domain.md index b4b54ac..fc5938f 100644 --- a/docs/usability-domain.md +++ b/content/usability-domain.md @@ -1,10 +1,28 @@ -_This document is part of the [BioCompute Object User Guide](user_guide.md)_ +--- +title: "Usability Domain" +menu: "main" +--- -_Back to [BCO domains](bco-domains.md)_ + + + +
+BioCompute Logo +
+ +
+ +_This document is part of the [BioCompute Object User Guide](/user_guide)_ + +_Back to [BCO domains](/bco-domains)_ ## 2.2 Usability Domain "usability_domain" -This section defines the `usability_domain` part of the [BCO](bco-domains.md) structure. +This section defines the `usability_domain` part of the [BCO](/bco-domains) structure. This field provides a space for the author to define the usability domain of the BCO. It is an array of free text values that should be consistant with terminology used in the [`name`](provenance_domain.md#2.1.1-name-name), external references ([`xref`](/description-domain.md#242-external-references-xref)), and [`keywords`](/description-domain.md#241-keywords-keywords) sections. The `usability_domain` can accept template language to indicate values from the [`external_references`](https://github.com/biocompute-objects/BCO_Specification/blob/master/external-references.md). The template takes the form of: * `(SNP)[SO:0000694]` diff --git a/docs/user_guide.md b/content/user_guide.md similarity index 67% rename from docs/user_guide.md rename to content/user_guide.md index 2eeba41..502de4f 100644 --- a/docs/user_guide.md +++ b/content/user_guide.md @@ -1,66 +1,95 @@ -GitHub: https://github.com/biocompute-objects - -OSF page: https://osf.io/h59uh/ +--- +title: "User Guide" +menu: "main" +--- -BioCompute Partnership: https://biocomputeobject.org - -# BioCompute Object (BCO) User Guide - -* This version: [draft-1.4.0](https://github.com/biocompute-objects/BCO_Specification/tree/dev) -* Previous version: [v1.3.1](https://github.com/biocompute-objects/BCO_Specification/tree/1.3.1) -* Latest release: https://github.com/biocompute-objects/BCO_Specification/releases/latest -* Latest editor's draft: https://github.com/biocompute-objects/BCO_Specification/tree/dev - + + +
+BioCompute Logo +
-This document was created by the [BioCompute Object Consortium members (BCOC)](#biocompute-object-consortium-members-bcoc) +
-### BCO_Specification is licensed under the [BSD 3-Clause "New" or "Revised" License](./LICENSE) +##### This document was created by the [BioCompute Object Consortium members (BCOC)](#biocompute-object-consortium-members-bcoc) ->A permissive license similar to the BSD 2-Clause License, but with a 3rd clause that prohibits others from using the name of the project or its contributors to promote derived products without written consent. - -# 1 Introduction +# BioCompute Object (BCO) User Guide +> This version: [draft-2.0.0](http://w3id.org/biocompute/2.0.0) +
This version is offerd as support for [2791-2020](https://standards.ieee.org/standard/2791-2020.html) - IEEE Approved Standard for Bioinformatics Computations and Analyses Generated by High-Throughput Sequencing (HTS) to Facilitate Communication.
+> Previous version: [1.4.0](http://w3id.org/biocompute/1.4.0)
+> Latest release: https://github.com/biocompute-objects/BCO_Specification/releases/latest
+> Latest editor's draft: https://github.com/biocompute-objects/BCO_Specification/tree/dev
+ +Note that unless you are viewing a [release](https://github.com/biocompute-objects/BCO_Specification/releases) this is a draft subject to change. + +Table of contents: + * [Introduction to BioCompute Objects](/introduction) + * [BCO domains](/bco-domains) + * [Top level fields](/top-level) + * [Provenance domain](/provenance-domain) + * [Usability domain](/usability-domain) + * [FHIR extension](/extension-fhir) + * [SCM extension](/extension-scm) + * [Description domain](/description-domain) + * [Execution domain](/execution-domain) + * [Parametric domain](/parametric-domain) + * [Input and Output domain](/io-domain) + * [Error domain](/error-domain) + * [BCO expanded view example HCV1a.json](/examples/HCV1a.json) + +---- + +## 1 Introduction This document specifies the structure of BioCompute Objects. The specification is split into multiple parts linked to from this top-level document and are maintained in a [GithHub repository](https://github.com/biocompute-objects/BCO_Specification) -where [contributions](/README.md#support-community-and-contributing) are welcome. +where [contributions](/#support-community-and-contributing) are welcome. Read more: -* [Introduction to BioCompute Objects](introduction.md) +* [Introduction to BioCompute Objects](/introduction) -# 2 BioCompute Domains +---- + +## 2 BioCompute Domains BCOs are represented in JSON (JavaScript Object Notation) formatted text, adhearing to [JSON schema draft-07](https://json-schema.org/specification.html). The JSON format was chosen because it is both human and machine readable/writable. For a detailed description of JSON see [www.json.org](http://www.json.org). -BioCompute data types are defined as aggregates of the critical fields organized into the following domains: the provenance domain, the usability domain, the extension domain, the description domain, the execution domain, the parametric domain, the input and output domains, and the error domain. At the time of creation with actual values compliant to the schema the BCO should be assigned a unique identifier, a [`object_id`](/docs/top-level.md#202-biocompute-object-identifier-object_id). The object could then be assigned a unique digital [`Etag`](/docs/top-level.md#203-checksum-checksum). +BioCompute data types are defined as aggregates of the critical fields organized into the following domains: the provenance domain, the usability domain, the extension domain, the description domain, the execution domain, the parametric domain, the input and output domains, and the error domain. At the time of creation with actual values compliant to the schema the BCO should be assigned a unique identifier, a [object_id](/top-level#202-biocompute-object-identifier-object_id). The object could then be assigned a unique digital [etag](/top-level#203-etag-etag). + +Three of the domains in a BioCompute Object SHOULD become immutable upon assignment of the digital [etag](/top-level#203-etag-etag): +1) the [Parametric Domain](/parametric-domain) +2) the [Execution Domain](/execution-domain) and +3) the [I/O Domain](/io-domain) -Three of the domains in a BioCompute Object SHOULD become immutable upon assignment of the digital `etag`: -1) the Parametric Domain -2) the Execution Domain and -3) the I/O Domain +* [BCO domains](/bco-domains) -* [BCO domains](bco-domains.md) +---- -# 3 Appendices +## 3 Appendices -## 3.1 Appendix-I: BCO expanded view example +### 3.1 Appendix-I: BCO expanded view example Complete example: -* [HCV1a.json](/HCV1a.json) +* [HCV1a.json](/examples/HCV1a.json) -## 3.2 Appendix-II: External reference database list +### 3.2 Appendix-II: External reference database list CURIEs (short identifiers) like `[taxonomy:31646]` in BCOs can be expanded to complete identifiers. Specifications: -* [External references](external-references.md) +* [External references](/external-references) -## 3.3 Title 21 CFR Part 11 +### 3.3 Title 21 CFR Part 11 *Code of Federal Regulations Title 21 Part 11: Electronic Records - Electronic Signatures* @@ -68,17 +97,15 @@ BioCompute project is being developed with Title 21 CFR Part 11 compliance in mi Discussions are now taking place to consider relevance of BioCompute Objects with relation to Title 21 CFR part 11. We encourage continuous input from BioCompute stakeholders on this subject now and while the concept is becoming more mature and more widely accepted by scientific and regulatory communities. -Relevant document link: - -[Part 11: Electronic Records](http://www.fda.gov/RegulatoryInformation/Guidances/ucm125067.htm) +> Relevant document link: [Part 11: Electronic Records](http://www.fda.gov/RegulatoryInformation/Guidances/ucm125067.htm) -## 3.4 Appendix IV - Compatibility +### 3.4 Appendix IV - Compatibility -### 3.4.1 ISA for the experimental metadata +#### 3.4.1 ISA for the experimental metadata ISA is a metadata framework to manage an increasingly diverse set of life science, environmental and biomedical experiments that employ one or a combination of technologies. Built around the **Investigation** (the project context), **Study** (a unit of research) and **Assay** (analytical measurements) concepts, ISA helps to provide rich descriptions of experimental metadata (i.e. sample characteristics, technology and measurement types, sample-to-data relationships) so that the resulting data and discoveries are reproducible and reusable. The ISA Model and Serialization Specifications define an Abstract Model of the metadata framework that has been implemented in two format specifications, ISA-Tab and ISA-JSON ([http://isa-tools.org/format/specification](http://isa-tools.org/format/specification)), both of which have supporting tools and services associated with them, including by a programmable Python AP ([http://isa-tools.org](http://isa-tools.org/format/specification)) and a varied user community and contributors ([http://www.isacommons.org](http://www.isacommons.org)). ISA focuses on structuring experimental metadata; raw and derived data files, codes, workflows etc are considered as external file that are referenced. An example, along its complementarity with other models and a computational workflow is illustrated in this paper, which shows how to explicitly declare elements of experimental design, variables, and findings: [http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0127612](http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0127612) -## 3.5 Appendix VI Acknowledgements +### 3.5 Appendix VI Acknowledgements This document began development during the [2017 HTS-CSRS workshop](https://hive.biochemistry.gwu.edu/htscsrs/workshop_2017). The discussion during the workshop led to the refinement and completion of this document. The workshop participants were a major part of the initial BCO community, and the comments and suggestions collected during the sessions were incorporated into this document. The people who participated in the 2017 workshop, and therefore made significant contributions are listed here: [https://osf.io/h59uh/](https://osf.io/h59uh/) @@ -91,7 +118,3 @@ Kahsay **Documentation Community:** Gil Alterovitz (Boston Children’s Hospital/Harvard Medical School, SMART/FHIR/HL7, GA4GH), Michael R. Crusoe (CWL), Marco Schito (C-Path), Konstantinos Krampis (CUNY), Alexander (Sasha) Wait Zaranek (Curoverse), John Quackenbush (DFCI/Harvard), Geet Duggal (DNAnexus), Singer Ma (DNAnexus), Yuching Lai (DDL), Warren Kibbe (Duke), Tony, Burdett (EBI), Helen Parkinson (EBI), Stuart Young (Engility Corp), Anupama Joshi (Epinomics), Vineeta Agarwala (Flatiron Health), James Hirmas (GenomeNext), David Steinberg (UCSC), Veronica Miller (HIV Forum), Dan Taylor (Internet 2), Paul Duncan (Merck), Jianchao Yao (Merck & Co., Inc., Boston, MA USA), Marilyn Matz (Paradigm4), Ben Busby (NCBI), Eugene Yaschenko (NCBI), Zhining Wang (NCI), Hsinyi (Steve) Tsang (NCI), Durga Addepalli (NCI/Attain), Heidi Sofia (NIH), Scott Jackson (NIST), Paul Walsh (NSilico Life Science), Toby Bloom (NYGC), Hiroki Morizono (CNMC), Jeremy Goecks (Oregon Health and Science University), Srikanth Gottipati (Otsuka-US), Alex Poliakov (Paradigm4), Keith Nangle (Pistoia Alliance), Jonas S Almeida (Stony Brook Univ, SUNY), Dennis A. Dean, II (Seven Bridges Genomics), Dustin Holloway (Seven Bridges Genomics), Nisha Agarwal (Solvuu), Stian Soiland-Reyes (UNIMAN), Carole Goble (UNIMAN), Susanna-Assunta Sansone (University of Oxford), Philippe Rocca-Serra (University of Oxford), Phil Bourne (Univ. of Virginia), Joseph Nooraga (Fred Hutchinson Cancer Research Center) - ------- - -Contact: Raja Mazumder (mazumder@gwu.edu) and Vahan Simonyan (vahansim@gmail.com) diff --git a/docs/parametric-domain.md b/docs/parametric-domain.md deleted file mode 100644 index 2b62e11..0000000 --- a/docs/parametric-domain.md +++ /dev/null @@ -1,17 +0,0 @@ -_This document is part of the [BioCompute Object User Guide](user_guide.md)_ - -_Back to [BCO domains](bco-domains.md)_ - -## 2.6 Parametric Domain "parametric_domain" - -This represents the list of **NON-default** parameters customizing the computational flow which can affect the output of the calculations. These fields can be custom to each kind of analysis and are tied to a particular pipeline implementation. The `parametric_domain` is not used for running/reproducing a bco e.g. not used by the [`execution_domain`](/execution-domain.md). It is _recommended_ these feilds be generated automatically, but that may not allways be possible. Please refer to documentation of individual scripts and specific BCO descriptions for details. While this domain is NOT required, it is reccommended that it be populated. - -```json -"parametric_domain": [ - {"param": "seed", "value": "14", "step": "1"}, - {"param":"minimum_match_len", "value": "66", "step": "1"}, - {"param": "divergence_threshold_percent", "value": 0.30, "step": "1"}, - {"param": "minimum_coverage", "value": "15", "step": "2"}, - {"param": "freq_cutoff", "value": 0.10, "step": "2"} -] -``` diff --git a/docs/release_protocol.md b/docs/release_protocol.md deleted file mode 100644 index e75cea9..0000000 --- a/docs/release_protocol.md +++ /dev/null @@ -1,20 +0,0 @@ -BioCompute Release Protocol -=========================== - -**Major (compatibility affected) Software Releases** - -This the protocol for releasing updates to the [master branch](https://github.com/biocompute-objects/BCO_Specification/tree/master) when the software updates affect the compatibility of previous releases. - -1. Standard system tests – As the changes and bugs are fixed in the [dev branch](https://github.com/biocompute-objects/BCO_Specification/tree/dev), the [dev branch](https://github.com/biocompute-objects/BCO_Specification/tree/dev) needs to be tested as the new changes are implemented on a piecemeal basis. -2. Create release notes – Create a list of the particular issues (software changes) to be included in this software release and draft a “Release Notes” document. This “Release Notes” document should include the software version number (e.g. 3.1.0) and contain a list of each change with the comit hash, title, and brief description of the change from a user perspective. -5. Schedule release date – Schedule a date at least 10 days in the future. This buffer will allow time for further testing. - -**Minor (compatibility not affected) Software Releases** - -This the protocol for releasing updates to the [master branch](https://github.com/biocompute-objects/BCO_Specification/tree/master) when the update changes don’t affect the compatibility of previous releases. - -1. Standard system tests – As the changes and bugs are fixed in the [dev branch](https://github.com/biocompute-objects/BCO_Specification/tree/dev), the [dev branch](https://github.com/biocompute-objects/BCO_Specification/tree/dev) needs to be tested as the new changes are implemented on a piecemeal basis. -2. Create release notes – Create a list of the particular issues (software changes) to be included in this software release and draft a “Release Notes” document. This “Release Notes” document should include the software version number (e.g. 3.01) and contain a list of each change with the Unfuddle ticket number, title, and brief description of the change from a user perspective.cx -4. Final round of Quality Assurance testing – After the release date is scheduled, a final round of QA testing needs to occur in the [dev branch](https://github.com/biocompute-objects/BCO_Specification/tree/dev) (all changes should be in the [dev branch](https://github.com/biocompute-objects/BCO_Specification/tree/dev) and already have gone through numbers rounds of bug testing) to ensure a bug is not being introduced into the production environment. -5. Promote changes to Production – Once a full round of QA testing is successful without any new bugs, the changes are ready to be promoted to production. - diff --git a/ieee-2791-schema/.gitignore b/ieee-2791-schema/.gitignore new file mode 100644 index 0000000..e43b0f9 --- /dev/null +++ b/ieee-2791-schema/.gitignore @@ -0,0 +1 @@ +.DS_Store diff --git a/schemas/biocomputeobject.json b/ieee-2791-schema/2791object.json old mode 100755 new mode 100644 similarity index 87% rename from schemas/biocomputeobject.json rename to ieee-2791-schema/2791object.json index eeb5c3b..1fe4e01 --- a/schemas/biocomputeobject.json +++ b/ieee-2791-schema/2791object.json @@ -1,9 +1,9 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://www.w3id.org/biocompute/1.4.0/schemas/biocomputeobject.json", + "$id": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", "type": "object", - "title": "Base type for all BioCompute Objects", - "description": "All BioCompute object types must adhear to this type in order to be compliant with BioCompute specification", + "title": "Base type for all IEEE-2791 Objects", + "description": "All IEEE-2791 object types must adhear to this type in order to be compliant with IEEE-2791 standard", "required": [ "object_id", "spec_version", @@ -17,14 +17,11 @@ "definitions": { "object_id": { "type": "string", - "description": "A unique identifier that should be applied to each BCO instance. IDs should never be reused", - "examples": [ - "https://w3id.org/biocompute/1.4.0/HCV1a.json" - ] + "description": "A unique identifier that should be applied to each IEEE-2791 Object instance, generated and assigned by a IEEE-2791 database engine. IDs should never be reused" }, "uri": { "type": "object", - "description": "A Uniform Resource Identifer", + "description": "Any of the four Resource Identifers defined at https://tools.ietf.org/html/draft-handrews-json-schema-validation-01#section-7.3.5", "additionalProperties": false, "required": [ "uri" @@ -39,11 +36,12 @@ }, "access_time": { "type": "string", + "description": "Time stamp of when the request for this data was submitted", "format": "date-time" }, "sha1_checksum": { "type": "string", - "description": "value of sha1 checksum of file", + "description": "output of hash function that produces a message digest", "pattern": "[A-Za-z0-9]+" } } @@ -121,9 +119,9 @@ }, "spec_version": { "type": "string", - "description": "Version of the BCO specification used to define this document", + "description": "Version of the IEEE-2791 specification used to define this document", "examples": [ - "https://w3id.org/biocompute/spec/v1.2" + "https://w3id.org/ieee/ieee-2791-schema/" ], "readOnly": true, "format": "uri" @@ -177,4 +175,4 @@ "$ref": "error_domain.json" } } -} \ No newline at end of file +} diff --git a/ieee-2791-schema/AUTHORS b/ieee-2791-schema/AUTHORS new file mode 100644 index 0000000..6395853 --- /dev/null +++ b/ieee-2791-schema/AUTHORS @@ -0,0 +1,23 @@ +# IEEE 2791 Object Schema Authors +# +# This does not necessarily list everyone who has contributed code, +# since in some cases, their employer may be the copyright holder. +# To see the full list of contributors, see the file CONTRIBUTORS. + +The Translational Genomics Research Institute +Gil Alterovitz +Michael Crusoe +Jeremy Goecks +John Quackenbush +Marco Schito +Hiroki Morizono +Paul Walsh +Hadley King +Dennis Dean II +Stian Soiland-Reyes +Raja Mazumder +Jonal Almeida +Carole Goble +Joseph Sayed Nooraga +Janisha Patel +Robel Kahsay diff --git a/ieee-2791-schema/CONTRIBUTORS b/ieee-2791-schema/CONTRIBUTORS new file mode 100644 index 0000000..2298ddf --- /dev/null +++ b/ieee-2791-schema/CONTRIBUTORS @@ -0,0 +1,24 @@ +# This is the list of IEEE 2791 Object Schema contributors +# +# This does not necessarily list the copyright holders, since in some +# cases, an employer may be the copyright holder. To see the full +# list of copyright holders, see the file AUTHORS + +Jason Travis +Gil Alterovitz +Michael Crusoe +Jeremy Goecks +John Quackenbush +Marco Schito +Hiroki Morizono +Paul Walsh +Hadley King +Dennis Dean II +Stian Soiland-Reyes +Raja Mazumder +Jonal Almeida +Carole Goble +Joseph Sayed Nooraga +Janisha Patel +Robel Kahsay + diff --git a/ieee-2791-schema/LICENSE b/ieee-2791-schema/LICENSE new file mode 100644 index 0000000..00f0c18 --- /dev/null +++ b/ieee-2791-schema/LICENSE @@ -0,0 +1,30 @@ +Copyright 2020 IEEE 2791 Object Schema Authors + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +SPDX-License-Identifier: BSD-3-Clause diff --git a/ieee-2791-schema/README.md b/ieee-2791-schema/README.md new file mode 100644 index 0000000..b18f08f --- /dev/null +++ b/ieee-2791-schema/README.md @@ -0,0 +1,51 @@ +# IEEE 2791 Object Schema + +IEEE 2791 Object Schema is intended for use as part of IEEE 2791 Std., +*Standard for Bioinformatics Computations and Analyses Generated by +High-Throughput Sequencing (HTS) to Facilitate Communication.* + +IEEE Std 2791™-2020 normatively references Version 1.4 of ieee-2791-schema. + + + +## License + +All source files (.json files) in this repository are subject to the +following copyright and licensing terms. + +> Copyright 2020 IEEE 2791 Object Schema Authors +> +> Redistribution and use in source and binary forms, with or without +> modification, are permitted provided that the following conditions are +> met: +> +> * Redistributions of source code must retain the above copyright +> notice, this list of conditions and the following disclaimer. +> +> * Redistributions in binary form must reproduce the above copyright +> notice, this list of conditions and the following disclaimer in the +> documentation and/or other materials provided with the distribution. +> +> * Neither the name of the copyright holder nor the names of its +> contributors may be used to endorse or promote products derived from +> this software without specific prior written permission. +> +> THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +> "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +> LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +> A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +> HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +> SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +> LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +> DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +> THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +> (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +> OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +> +> SPDX-License-Identifier: BSD-3-Clause + + + +See the LICENSE file distributed with this work for copyright and +licensing information, the AUTHORS file for a list of copyright +holders, and the CONTRIBUTORS file for the list of contributors. diff --git a/schemas/description_domain.json b/ieee-2791-schema/description_domain.json old mode 100755 new mode 100644 similarity index 91% rename from schemas/description_domain.json rename to ieee-2791-schema/description_domain.json index 9d763c4..f22610e --- a/schemas/description_domain.json +++ b/ieee-2791-schema/description_domain.json @@ -1,6 +1,6 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://www.w3id.org/biocompute/1.3.0/schemas/description_domain.json", + "$id": "https://w3id.org/ieee/ieee-2791-schema/description_domain.json", "type": "object", "title": "Description Domain", "description": "Structured field for description of external references, the pipeline steps, and the relationship of I/O objects.", @@ -26,7 +26,7 @@ }, "xref": { "type": "array", - "description": "List of the databases or ontology IDs that are cross-referenced in the BCO.", + "description": "List of the databases or ontology IDs that are cross-referenced in the IEEE-2791 Object.", "items": { "type": "object", "description": "External references are stored in the form of prefixed identifiers (CURIEs). These CURIEs map directly to the URIs maintained by Identifiers.org.", @@ -73,7 +73,7 @@ }, "platform": { "type": "array", - "description": "reference to a particular deployment of an existing platform where this BCO can be reproduced.", + "description": "reference to a particular deployment of an existing platform where this IEEE-2791 Object can be reproduced.", "items": { "type": "string", "examples": [ @@ -101,7 +101,7 @@ }, "name": { "type": "string", - "description": "This is the common name of the software tool", + "description": "This is a recognized name of the software tool", "examples": [ "HIVE-hexagon" ] @@ -115,7 +115,7 @@ }, "version": { "type": "string", - "description": "Version assigned to the instance of the tool used.", + "description": "Version assigned to the instance of the tool used corresponding to the upstream release.", "examples": [ "1.3" ] @@ -139,7 +139,7 @@ ] }, "uri": { - "$ref": "biocomputeobject.json#/definitions/uri" + "$ref": "2791object.json#/definitions/uri" } } } @@ -148,14 +148,14 @@ "type": "array", "description": "URIs (expressed as a URN or URL) of the input files for each tool.", "items": { - "$ref": "biocomputeobject.json#/definitions/uri" + "$ref": "2791object.json#/definitions/uri" } }, "output_list": { "type": "array", "description": "URIs (expressed as a URN or URL) of the output files for each tool.", "items": { - "$ref": "biocomputeobject.json#/definitions/uri" + "$ref": "2791object.json#/definitions/uri" } } } diff --git a/schemas/error_domain.json b/ieee-2791-schema/error_domain.json similarity index 69% rename from schemas/error_domain.json rename to ieee-2791-schema/error_domain.json index aec9a8a..c0be62b 100644 --- a/schemas/error_domain.json +++ b/ieee-2791-schema/error_domain.json @@ -1,9 +1,10 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://www.w3id.org/biocompute/1.3.0/schemas/error_domain.json", + "$id": "https://w3id.org/ieee/ieee-2791-schema/error_domain.json", "type": "object", "title": "Error Domain", - "description": "", + "description": "Fields in the Error Domain are open-ended and not restricted nor defined by the IEEE-2791 standard. It is RECOMMENDED that the keys directly under empirical_error and algorithmic_error use a full URI. Resolving the URI SHOULD give a JSON Schema or textual definition of the field. Other keys are not allowed error_domain", + "additionalProperties": false, "required": [ "empirical_error", "algorithmic_error" @@ -20,5 +21,4 @@ "description": "descriptive of errors that originate by fuzziness of the algorithms, driven by stochastic processes, in dynamically parallelized multi-threaded executions, or in machine learning methodologies where the state of the machine can affect the outcome." } } - -} \ No newline at end of file +} diff --git a/schemas/execution_domain.json b/ieee-2791-schema/execution_domain.json old mode 100755 new mode 100644 similarity index 80% rename from schemas/execution_domain.json rename to ieee-2791-schema/execution_domain.json index 97c7d0e..858cad2 --- a/schemas/execution_domain.json +++ b/ieee-2791-schema/execution_domain.json @@ -1,9 +1,9 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://www.w3id.org/biocompute/1.3.0/schemas/execution_domain.json", + "$id": "https://w3id.org/ieee/ieee-2791-schema/execution_domain.json", "type": "object", "title": "Execution Domain", - "description": "The fields required for execution of the BCO are herein encapsulated together in order to clearly separate information needed for deployment, software configuration, and running applications in a dependent environment", + "description": "The fields required for execution of the IEEE-2791 Object are herein encapsulated together in order to clearly separate information needed for deployment, software configuration, and running applications in a dependent environment", "required": [ "script", "script_driver", @@ -15,19 +15,19 @@ "properties": { "script": { "type": "array", - "description": "points to internal or external references to a script object that was used to perform computations for this BCO instance.", + "description": "points to a script object or objects that was used to perform computations for this IEEE-2791 Object instance.", "items": { "additionalProperties": false, "properties": { "uri": { - "$ref": "biocomputeobject.json#/definitions/uri" + "$ref": "2791object.json#/definitions/uri" } } } }, "script_driver": { "type": "string", - "description": "Specification of the kind of executable that can be launched in order to perform a sequence of commands described in the script in order to run the pipelin", + "description": "Indication of the kind of executable that can be launched in order to perform a sequence of commands described in the script in order to run the pipelin", "examples": [ "hive", "cwl-runner", @@ -36,7 +36,7 @@ }, "software_prerequisites": { "type": "array", - "description": "Minimal necessary prerequisites, library, tool versions needed to successfully run the script to produce BCO.", + "description": "Minimal necessary prerequisites, library, tool versions needed to successfully run the script to produce this IEEE-2791 Object.", "items": { "type": "object", "description": "A necessary prerequisite, library, or tool version.", @@ -62,14 +62,14 @@ ] }, "uri": { - "$ref": "biocomputeobject.json#/definitions/uri" + "$ref": "2791object.json#/definitions/uri" } } } }, "external_data_endpoints": { "type": "array", - "description": "Minimal necessary domain-specific external data source access in order to successfully run the script to produce BCO.", + "description": "Minimal necessary domain-specific external data source access in order to successfully run the script to produce this IEEE-2791 Object.", "items": { "type": "object", "description": "Requirement for network protocol endpoints used by a pipeline’s scripts, or other software.", diff --git a/schemas/io_domain.json b/ieee-2791-schema/io_domain.json old mode 100755 new mode 100644 similarity index 79% rename from schemas/io_domain.json rename to ieee-2791-schema/io_domain.json index 2a01cb9..c460e57 --- a/schemas/io_domain.json +++ b/ieee-2791-schema/io_domain.json @@ -1,9 +1,9 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://www.w3id.org/biocompute/1.3.0/schemas/io_domain.json", + "$id": "https://w3id.org/ieee/ieee-2791-schema/io_domain.json", "type": "object", "title": "Input and Output Domain", - "description": "The list of global input and output files created by the computational workflow, excluding the intermediate files. Custom to every specific BCO implementation, these fields are pointers to objects that can reside in the system performing the computation or any other accessible system.", + "description": "The list of global input and output files created by the computational workflow, excluding the intermediate files. Custom to every specific IEEE-2791 Object implementation, these fields are pointers to objects that can reside in the system performing the computation or any other accessible system.", "required": [ "input_subdomain", "output_subdomain" @@ -21,7 +21,7 @@ ], "properties": { "uri": { - "$ref": "biocomputeobject.json#/definitions/uri" + "$ref": "2791object.json#/definitions/uri" } } } @@ -42,13 +42,14 @@ "type": "string", "title": "mediatype", "description": "https://www.iana.org/assignments/media-types/", + "default": "application/octet-stream", "examples": [ "text/csv" ], "pattern": "^(.*)$" }, "uri": { - "$ref": "biocomputeobject.json#/definitions/uri" + "$ref": "2791object.json#/definitions/uri" } } } diff --git a/schemas/parametric_domain.json b/ieee-2791-schema/parametric_domain.json old mode 100755 new mode 100644 similarity index 88% rename from schemas/parametric_domain.json rename to ieee-2791-schema/parametric_domain.json index 2c64400..cde0644 --- a/schemas/parametric_domain.json +++ b/ieee-2791-schema/parametric_domain.json @@ -1,6 +1,6 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://www.w3id.org/biocompute/1.3.0/schemas/parametric_domain", + "$id": "https://w3id.org/ieee/ieee-2791-schema/parametric_domain.json", "type": "array", "title": "Parametric Domain", "description": "This represents the list of NON-default parameters customizing the computational flow which can affect the output of the calculations. These fields can be custom to each kind of analysis and are tied to a particular pipeline implementation", @@ -18,8 +18,7 @@ "description": "Specific variables for the computational workflow", "examples": [ "seed" - ], - "pattern": "^(.*)$" + ] }, "value": { "type": "string", @@ -27,8 +26,7 @@ "title": "value", "examples": [ "14" - ], - "pattern": "^(.*)$" + ] }, "step": { "type": "string", @@ -41,4 +39,4 @@ } } } -} \ No newline at end of file +} diff --git a/schemas/provenance_domain.json b/ieee-2791-schema/provenance_domain.json old mode 100755 new mode 100644 similarity index 73% rename from schemas/provenance_domain.json rename to ieee-2791-schema/provenance_domain.json index 0578154..0c1aa5a --- a/schemas/provenance_domain.json +++ b/ieee-2791-schema/provenance_domain.json @@ -1,6 +1,6 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://www.w3id.org/biocompute/1.3.0/schemas/provenance_domain.json", + "$id": "https://w3id.org/ieee/ieee-2791-schema/provenance_domain.json", "type": "object", "title": "Provenance Domain", "description": "Structured field for tracking data through transformations, including contributors, reviewers, and versioning.", @@ -16,22 +16,22 @@ "properties": { "name": { "type": "string", - "description": "Public searchable name for BioCompute Object. This public field should take free text value using common biological research terminology supporting the terminology used in the usability_domain, external references (xref), and keywords sections.", + "description": "Public searchable name for IEEE-2791 Object. This public field should take free text value using common biological research terminology supporting the terminology used in the usability_domain, external references (xref), and keywords sections.", "examples": [ "HCV1a ledipasvir resistance SNP detection" ] }, "version": { "type": "string", - "description": "Records the versioning of this BCO instance object. BioCompute Object Version should adhere to semantic versioning as recommended by Semantic Versioning 2.0.0.", - "reference": "https://semver.org/", + "description": "Records the versioning of this IEEE-2791 Object instance. IEEE-2791 Object Version should adhere to semantic versioning as recommended by Semantic Versioning 2.0.0.", + "reference": "https://semver.org/spec/v2.0.0.html", "examples": [ "2.9" ] }, "review": { "type": "array", - "description": "Description of the current verification status of an object in the review process. The unreviewed flag indicates that the object has been submitted, but no further evaluation or verification has occurred. The in-review flag indicates that verification is underway. The approved flag indicates that the BCO has been verified and reviewed. The suspended flag indicates an object that was once valid is no longer considered valid. The rejected flag indicates that an error or inconsistency was detected in the BCO, and it has been removed or rejected. The fields from the contributor object (described in section 2.1.10) is inherited to populate the reviewer section.", + "description": "Description of the current verification status of an object in the review process. The unreviewed flag indicates that the object has been submitted, but no further evaluation or verification has occurred. The in-review flag indicates that verification is underway. The approved flag indicates that the IEEE-2791 Object has been verified and reviewed. The suspended flag indicates an object that was once valid is no longer considered valid. The rejected flag indicates that an error or inconsistency was detected in the IEEE-2791 Object, and it has been removed or rejected. The fields from the contributor object (described in section 2.1.10) is inherited to populate the reviewer section.", "items": { "type": "object", "required": [ @@ -45,14 +45,14 @@ "format": "date-time" }, "reviewer": { - "$ref": "biocomputeobject.json#/definitions/contributor", - "description": "Contributer that assigns BCO review status." + "$ref": "2791object.json#/definitions/contributor", + "description": "Contributer that assigns IEEE-2791 review status." }, "reviewer_comment": { "type": "string", "description": "Optional free text comment by reviewer", "examples": [ - "Approved by GW staff. Waiting for approval from FDA Reviewer" + "Approved by research institution staff. Waiting for approval from regulator" ] }, "status": { @@ -64,15 +64,15 @@ "rejected", "suspended" ], - "description": "Current verification status of the BioCompute Object", + "description": "Current verification status of the IEEE-2791 Object", "default": "unreviewed" } } } }, "derived_from": { - "description": "value of `object_id` field of another BioCompute object", - "$ref": "biocomputeobject.json#/definitions/object_id" + "description": "value of `ieee2791_id` field of another IEEE-2791 that this object is partially or fully derived from", + "$ref": "2791object.json#/definitions/object_id" }, "obsolete_after": { "type": "string", @@ -98,13 +98,13 @@ }, "created": { "type": "string", - "description": "Date and time of the BioCompute Object creation", + "description": "Date and time of the IEEE-2791 Object creation", "readOnly": true, "format": "date-time" }, "modified": { "type": "string", - "description": "Date and time the BioCompute Object was last modified", + "description": "Date and time the IEEE-2791 Object was last modified", "readOnly": true, "format": "date-time" }, @@ -112,7 +112,7 @@ "type": "array", "description": "This is a list to hold contributor identifiers and a description of their type of contribution, including a field for ORCIDs to record author information, as they allow for the author to curate their information after submission. The contribution type is a choice taken from PAV ontology: provenance, authoring and versioning, which also maps to the PROV-O.", "items": { - "$ref": "biocomputeobject.json#/definitions/contributor" + "$ref": "2791object.json#/definitions/contributor" } }, "license": { diff --git a/schemas/usability_domain.json b/ieee-2791-schema/usability_domain.json old mode 100755 new mode 100644 similarity index 51% rename from schemas/usability_domain.json rename to ieee-2791-schema/usability_domain.json index e99c712..54e936e --- a/schemas/usability_domain.json +++ b/ieee-2791-schema/usability_domain.json @@ -1,17 +1,16 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://www.w3id.org/biocompute/1.3.0/schemas/usability_domain.json", + "$id": "https://w3id.org/ieee/ieee-2791-schema/usability_domain.json", "type": "array", "title": "Usability Domain", - "description": "Author-defined usability domain of the BCO. This field is to aid in search-ability and provide a specific description of the function of the object. It is recommended that a novel use of the BCO could result in the creation of a new entry with a new usability domain", + "description": "Author-defined usability domain of the IEEE-2791 Object. This field is to aid in search-ability and provide a specific description of the function of the object.", "items": { "type": "string", - "description": "Free text values that can accept template language to indicate values from the external_references", + "description": "Free text values that can be used to provide scientific reasoning and purpose for the experiment", "examples": [ "Identify baseline single nucleotide polymorphisms SNPs [SO:0000694], insertions [so:SO:0000667], and deletions [so:SO:0000045] that correlate with reduced ledipasvir [pubchem.compound:67505836] antiviral drug efficacy in Hepatitis C virus subtype 1 [taxonomy:31646]", "Identify treatment emergent amino acid substitutions [so:SO:0000048] that correlate with antiviral drug treatment failure", - "Determine whether the treatment emergent amino acid substitutions [so:SO:0000048] identified correlate with treatment failure involving other drugs against the same virus", - "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + "Determine whether the treatment emergent amino acid substitutions [so:SO:0000048] identified correlate with treatment failure involving other drugs against the same virus" ] } -} \ No newline at end of file +} diff --git a/netlify.toml b/netlify.toml new file mode 100644 index 0000000..2709c4b --- /dev/null +++ b/netlify.toml @@ -0,0 +1,6 @@ +[build] +publish = "public" +command = "hugo --gc --minify" + +[context.production.environment] +HUGO_VERSION = "0.76.5" diff --git a/public/404.html b/public/404.html new file mode 100644 index 0000000..b300993 --- /dev/null +++ b/public/404.html @@ -0,0 +1,73 @@ + + + + + + + 404 Page not found - BioCompute Object Documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+

404

+

page not found

+

back to the homepage

+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/_redirects b/public/_redirects new file mode 100644 index 0000000..e69de29 diff --git a/public/bco-domains/index.html b/public/bco-domains/index.html new file mode 100644 index 0000000..deadf14 --- /dev/null +++ b/public/bco-domains/index.html @@ -0,0 +1,273 @@ + + + + + + + BCO Domains - BioCompute Object Documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+ + + +
+BioCompute Logo +
+
+

This document is part of the BioCompute Object User Guide

+

BCO domains

+

A BCO JSON object is split into different parts, or domains, detailed below.

+

Condensed example:

+
{
+    "spec_version" : "https://w3id.org/biocompute/1.3.0/",
+    "object_id": "https://example.com/bco/9487ae7e-c1aa-4a3c-b18f-3d3695b33ace",
+    "type": "antiviral_resistance_detection", 
+    "etag": "584C7FE128717E1712426AB19CAAEA8BC1E27365B54285BBEA1221284C7D3A48",
+    "provenance_domain": {
+    },
+    "usability_domain": [
+    ],
+    "extension_domain":{
+        "fhir_extension": [
+        ],
+        "scm_extension": {
+        }
+    },
+    "description_domain": {
+    },
+    "execution_domain": {
+    }, 
+    "parametric_domain": {
+    }, 
+    "io_domain": {
+    }, 
+    "error_domain": {
+    }
+}
+

2.0 Top Level Fields

+

The top level JSON object of a BCO include attributes to define the BCO itself.

+

Definition:

+ +

The remaining top level fields details different domains of the BCO, as listed below.

+

2.1 Provenance Domain “provenance_domain”

+

The provenance_domain defines the history, version and status of this BCO as part of the review process.

+

Definition:

+ +

2.2 Usability Domain “usability_domain”

+

The usability_domain is meant to improve searchability by allowing free-text description of the BCO.

+

Definition:

+ +

2.3 Extension Domain “extension_domain”

+

The extension_domain is a space for a user to add additional structured information that is not defined in the BioCompute shcema. The extension_domain section is not evaluated by checks for BCO validity or computational correctness and as such is the place to add ANY type of additional structured information. We provide two examples that are neither exclusive nor exhaustive.

+

Definitions:

+ +

2.4 Description Domain “description_domain”

+

The description_domain contain structured field for description of external references, the pipeline steps, and the relationship of I/O objects.

+

Definition:

+ +

2.5 Execution Domain “execution_domain”

+

The execution_domain contains fields for execution of the BCO.

+

Definition:

+ +

2.6 Parametric Domain “parametric_domain”

+

The parametric_domain represents the list of parameters customizing the computational flow which can affect the output of the calculations.

+

Definition:

+ +

2.7 Input and Output Domain “io_domain”

+

The io_domain represents the list of global input and output files created by the computational workflow.

+

Definition:

+ +

2.8 Error Domain, acceptable range of variability “error_domain”

+

The error_domain defines the empirical and algorithmic limits an d error sources of the BCO.

+

Definition:

+ + + + +
+
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/best_practices/index.html b/public/best_practices/index.html new file mode 100644 index 0000000..e39c3c4 --- /dev/null +++ b/public/best_practices/index.html @@ -0,0 +1,235 @@ + + + + + + + BCO Best Practice - BioCompute Object Documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+ + + +
+BioCompute Logo +
+
+
+

BioCompute Objects Best Practice

+
+
+

General

+
    +
  • The required domains are defined by the IEEE . However, a BioCompute Object is considered complete when an Error Domain exists.
  • +
  • Versioning is allowed, but only if the changes do not affect the workflow or output. BCO versioning follows a minor.patch schema, no major versions are allowed (substantial changes result in a new BCO). Minor changes are things like a change of contact information for a contributor, patch changes are things like spelling and grammar fixes.
  • +
  • In general, any step that does not transform data does not need to be included in the Description Domain as a formal step, and can be described instead in the Usability Domain. For example, arranging rows and columns in a table, or formatting a figure. Steps that transform data should comprise their own step in the Description Domain.
  • +
  • The Usability Domain should contain enough information to enable a naïve user generally skilled in bioinformatics to understand the analysis. This means that references to commonly used resources (such as basic Unix commands, well known databases like NCBI, basic terms like “alignment,” etc.) do not need to be explained, but references to less well known resources (such as obscure python packages, etc.) should be described. Description should be tailored to the intended audience, and BCOs intended for public consumption should assume a basic level of bioinformatics proficiency.
  • +
+

BioCompute Registry

+

The BioCompute Registry is a domain registry for BCO IDs in which users can register their institution or organization. Similar to a website registry, this will allow the owner of that domain to use any domain organization of their choosing, and prevent naming collisions between groups. For example, the owner of “GW” can build BCOs GW_0001.1, GW01A, GW_, or any other naming system of their preference, and these will not conflict with another registered domain, such as FDA_0001.1, etc. The BCO Registry registration numbers may not exceed five characters, and are recommended to be three characters. Any alphanumeric characters are acceptable.

+

A BCO may be registered only by the author of the object, and the domain must be approved by the domain holder. Until automated systems are in place, register a BCO by sending the BCO ID and email of the registrant to the BioCompute Team. The following institutional domains have been reserved:

+
    +
  • GWU
  • +
  • FDA
  • +
  • NIH
  • +
  • CDC
  • +
  • NCI
  • +
+

Preferred Ontologies

+

Semantic Versioning

+

BCO versioning should adhere to semantic versioning to establish how version numbers are assigned and incremented. Given a version number MAJOR.MINOR.PATCH, when versioning a BCO increment the:

+
    +
  1. MAJOR version when you make incompatible API changes,
  2. +
  3. MINOR version when you add functionality in a backwards-compatible manner, and
  4. +
  5. PATCH version when you make backwards-compatible bug fixes. +Additional labels for pre-release and build metadata are available as extensions to the MAJOR.MINOR.PATCH format.
  6. +
+

PAV Ontology and PROV-O

+

To preserve the provenance of each BCO, the contribution type of the reviewers and contributors is a choice taken from PAV ontology: provenance, authoring and versioning, which also maps to the PROV-O. The following are possible values for the status of an object in the review process:

+
    +
  • unreviewed flag indicates that the object has been submitted, but no further evaluation or verification has occurred.
  • +
  • in-review flag indicates that verification is underway.
  • +
  • approved flag indicates that the BCO has been verified and reviewed.
  • +
  • suspended flag indicates an object that was once valid is no longer considered valid.
  • +
  • rejected flag indicates that an error or inconsistency was detected in the BCO, and it has been removed or rejected.
  • +
+

Namespace: CURIE

+

External references field contains a list of the databases and/or ontology IDs that are cross-referenced in the BCO. The external references are used to provide more specificity in the information related to BCO entries. Cross-referenced resources need to be available in the public domain. The external references are stored in the form of prefixed identifiers (CURIEs). These CURIEs map directly to the URIs maintained by identifiers.org. See Section 3.5 for a list of the CURIEs used in this example.

+

General

+
    +
  • The required domains are defined by the IEEE . However, a BioCompute Object is considered complete when an Error Domain exists.
  • +
  • Versioning is allowed, but only if the changes do not affect the workflow or output. BCO versioning follows a minor.patch schema, no major versions are allowed (substantial changes result in a new BCO). Minor changes are things like a change of contact information for a contributor, patch changes are things like spelling and grammar fixes.
  • +
  • In general, any step that does not transform data does not need to be included in the Description Domain as a formal step, and can be described instead in the Usability Domain. For example, arranging rows and columns in a table, or formatting a figure. Steps that transform data should comprise their own step in the Description Domain.
  • +
  • The Usability Domain should contain enough information to enable a naïve user generally skilled in bioinformatics to understand the analysis. This means that references to commonly used resources (such as basic Unix commands, well known databases like NCBI, basic terms like “alignment,” etc.) do not need to be explained, but references to less well known resources (such as obscure python packages, etc.) should be described. Description should be tailored to the intended audience, and BCOs intended for public consumption should assume a basic level of bioinformatics proficiency.
  • +
+ + + +
+
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/categories/index.html b/public/categories/index.html new file mode 100644 index 0000000..dcf8346 --- /dev/null +++ b/public/categories/index.html @@ -0,0 +1,178 @@ + + + + + + + Categories - BioCompute Object Documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+

Posts about Categories

+
+
    + +
+
+
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/categories/index.xml b/public/categories/index.xml new file mode 100644 index 0000000..31e0154 --- /dev/null +++ b/public/categories/index.xml @@ -0,0 +1,10 @@ + + + + Categories on BioCompute Object Documentation + /categories/ + Recent content in Categories on BioCompute Object Documentation + Hugo -- gohugo.io + en-us + + diff --git a/public/css/custom.css b/public/css/custom.css new file mode 100644 index 0000000..bfd6874 --- /dev/null +++ b/public/css/custom.css @@ -0,0 +1,352 @@ +/* Global styles */ + +body { + font-family: "Public Sans", -apple-system, BlinkMacSystemFont, "Segoe UI", "Roboto", "Helvetica Neue", Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol"; + font-feature-settings: "liga", "kern"; +} + +/* Fix jumping scrollbar when switching to long pages */ + +html { + margin-left: calc(100vw - 100%); + margin-right: 0; +} + +/* link */ + +a { + color: #0071bc; + text-decoration: none; + background-color: transparent; +} + +a:hover, a:focus, a:active { + color: #8cc53f; + text-decoration: none; +} + +/* landing content */ + +.landing { + font-size: 1.125rem; + line-height: 1.875rem; +} + +.landing p { + margin-bottom: 1.5rem; +} + +/* horizontal rule */ + +hr { + border: 0; + border-top: 1px solid #e1e9f4; +} + +.footer hr { + margin: 1.3rem 1.52rem 1.3rem 1.52rem; +} + +/* Partial: Colors */ + +.text-patreon { + color: #F96854 !important; +} + +.text-ko-fi { + color: #DF5000 !important; +} + +.text-bmc { + color: #FF813F !important; +} + +/* Partial: Header */ + +.header a:hover { + text-decoration: none; +} + +.header ul.nav li { + font-size: 1rem; +} + +.header ul.nav li a.text-events { + color: #8cc53f; +} + +.header ul.nav li a.text-events:hover { + color: #0071bc; +} + +/* Partial: Content */ + +.content .page-heading { + font-size: 1.5rem; + font-weight: 700; + letter-spacing: -0.005rem; + text-transform: "capitalize"; + -moz-osx-font-smoothing: grayscale; + -webkit-font-smoothing: antialiased; + color: #333333; + margin-bottom: 16px; +} + +.content h1, h2, h3, h4, h5, h6 { + font-family: "Public Sans", -apple-system, BlinkMacSystemFont, "Segoe UI", "Roboto", "Helvetica Neue", Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol"; + font-weight: 700; + color: #333333; + margin-bottom: 1rem; +} + +.content .meta { + font-weight: 400; + font-size: 1.125rem; + color: #738491; + margin-bottom: 10px; +} + +.content .meta a { + text-decoration: none; +} + +.content .middot:before { + margin: 0 3px; + content: "·"; +} + +.content .caption { + text-align: center; + margin-top: 10px; + color: #738491; + font-size: 0.9rem; +} + +.content .markdown { + font-family: "Public Sans", -apple-system, BlinkMacSystemFont, "Segoe UI", "Roboto", "Helvetica Neue", Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol"; + font-weight: 400; + font-size: 1.125rem; + color: #333333; + line-height: 1.75rem; +} + +.content .markdown li { + margin-top: 1rem; + margin-bottom: 1rem; +} + +.content twitterwidget { + margin: auto; +} + +.content .meta, .content .markdown h1, .content .markdown h2, .content .markdown h3, .content .markdown h4, .content .markdown h5, .content .markdown h6, .content .markdown p, .content .markdown ul, .content .markdown ol, .content .markdown dl, .content .markdown blockquote, .gist-file { + margin-left: 1.5rem; + margin-right: 1.5rem; +} + +div.alert { + margin-left: 1.5rem; + margin-right: 1.5rem; +} + +div.alert p { + margin-bottom: 0; +} + +/* blog post typography */ + +.content .blog-post-title { + font-size: 2.5rem; + line-height: 3rem; +} + +.content .blog-post-content { + font-size: 1.3125rem; + line-height: 2rem; +} + +/* centered, boxed blog post images */ + +.content .figure img { + display: block; + margin-left: auto; + margin-right: auto; + padding-left: 1.5rem; + padding-right: 1.5rem; + max-width: 100%; +} + +.content .markdown code, .content .markdown pre { + font-family: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; + background-color: #fff; +} + +.content .markdown code { + /* enclosed by single backtick (`) */ + color: #333; + padding: .2em .4em; + margin: 0; + font-size: 1rem; + background-color: rgba(27, 31, 35, .05); + border-radius: 6px; +} + +.content .markdown pre { + /* Hugo specific: consider using the 'highlight' shortcode */ + display: block; + margin-top: 1rem; + margin-bottom: 2rem; + padding: 1rem; + line-height: 20px; + white-space: pre; + word-break: break-all; + word-wrap: break-word; + margin-left: 1.5rem; + margin-right: 1.5rem; + border: 1px solid #ddd; + border-radius: 6px; +} + +.content .markdown pre code, .content .markdown pre output { + /* enclosed by 4 backticks (````) */ + padding: 0; + font-size: 0.75rem; + line-height: 20px; + background-color: #fff; + border-radius: 0; +} + +.content .markdown blockquote { + padding: 0.5rem 0.5rem; + margin-top: 1rem; + margin-bottom: 1rem; + color: #7a7a7a; + border-left: 0.25rem solid #595959; +} + +.content .markdown blockquote p:last-child { + margin-bottom: 0; +} + +.content .markdown figure { + background: #fff; +} + +.content .groupby { + list-style: none; + color: #333333; + font-size: 2rem; + font-weight: 700; + margin-top: 10px; + margin-bottom: 8px; +} + +.content .post-item { + display: -webkit-flex; + display: -moz-flex; + display: -ms-flexbox; + display: -ms-flex; + display: flex; + margin-bottom: 1rem; +} + +.content .post-item .meta-date { + font-size: 1.125rem; + color: #738491; + line-height: 2rem; + display: block; + min-width: 10rem; +} + +.content .post-item .meta-title { + font-size: 1.125rem; + line-height: 2rem; + margin-left: -3rem; +} + +.content .navigation .icon { + width: 16px; + height: 16px; +} + +/* Partial: Utterances Comments */ + +.comments { + padding-left: 1.5rem; + padding-right: 1.5rem; + max-width: 100%; +} + +/* Partial: Footer */ + +.footer { + font-weight: 400; + font-size: 1.125rem; +} + +.footer a { + color: #333333; +} + +.footer a:hover, .footer a:focus { + color: #0071bc; +} + +.footer a { + margin-left: 6px; + margin-right: 6px; +} + +.footer a:first-child { + margin-left: 0; +} + +.footer a:last-child { + margin-right: 0; +} + +.site-copyright { + min-width: 16em; +} + +.site-social { + text-align: right; +} + +.site-social ul { + list-style-type: none; + padding: 0; +} + +.site-social li { + display: inline-block; +} + +.site-social li:not(:last-child)::after { + content: '|'; + color: #e1e9f4; + margin: 0 0.3rem 0 0.5rem; +} + +/* < medium screens */ + +@media (max-width: 767.98px) { + html { + font-size: 0.9rem; + } + .content .meta, .content .markdown h1, .content .markdown h2, .content .markdown h3, .content .markdown h4, .content .markdown h5, .content .markdown h6, .content .markdown p, .content .markdown ul, .content .markdown ol, .content .markdown dl, .content .markdown blockquote { + margin-left: 0; + margin-right: 0; + } +} + +/* < large screens */ + +@media (max-width: 991.98px) { + .footer hr { + margin: 1.3rem 0rem 1.3rem 0rem; + } + .site-social { + text-align: left; + } +} \ No newline at end of file diff --git a/public/description-domain/index.html b/public/description-domain/index.html new file mode 100644 index 0000000..c231c6c --- /dev/null +++ b/public/description-domain/index.html @@ -0,0 +1,337 @@ + + + + + + + Description Domain - BioCompute Object Documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+ + + +
+BioCompute Logo +
+
+

This document is part of the BioCompute Object User Guide

+

Back to BCO domains

+

2.4 Description Domain “description_domain”

+

This section defines the fields of the description_domain part of the BCO structure.

+

Structured field for description of external references, the pipeline steps, and the relationship of I/O objects. Information in this domain is not used for computation. This domain is meant to capture information that is currently being provided in FDA submission in journal format. It is possible that in the future this field can be semi-automatically generated from the execution_domain information.

+

Condensed example:

+
    "description_domain": {
+        "keywords": [
+        ], 
+        "xref": [
+        ],
+        "platform": ["HIVE"],
+        "pipeline_steps": [
+            {
+                "step_number": 1, 
+                "name": "HIVE-hexagon", 
+                "description": "Alignment of reads to a set of references", 
+                "version": "1.3", 
+                "prerequisite": [
+                    {
+                        "name": "Hepatitis C virus genotype 1", 
+                        "uri": {
+                            "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792",
+                            "access_time": "2017-01-24T09:40:17-0500"
+                        }
+                    }
+                ], 
+                "input_list": [
+                ],
+                "output_list": [
+                ]
+            }
+        ]
+    }
+

2.4.1 Keywords “keywords”

+

This is a list of keywords to aid in search-ability and description of the experiment. This is required.

+
    "keywords": [
+        "HCV1a", 
+        "Ledipasvir", 
+        "antiviral resistance", 
+        "SNP", 
+        "amino acid substitutions"
+    ]
+

2.4.2 External References “xref”

+

This field contains a list of the databases and/or ontology IDs that are cross-referenced in the BCO. The external references are used to provide more specificity in the information related to BCO entries. Cross-referenced resources need to be available in the public domain. The external references are stored in the form of prefixed identifiers (CURIEs). These CURIEs map directly to the URIs maintained by identifiers.org. See Appendix-II for a list of the CURIEs used in this example.

+
        "xref": [
+            {
+                "namespace": "pubchem.compound",
+                "name": "PubChem-compound",
+                "ids": ["67505836"], 
+                "access_time": "2018-13-02T10:15-05:00"
+            },
+
+            {
+                "namespace": "pubmed",
+                "name": "PubMed",
+                "ids": ["26508693"], 
+                "access_time": "2018-13-02T10:15-05:00"
+            },
+
+            {
+                "namespace": "so",
+                "name": "Sequence Ontology",
+                "ids": ["SO:000002", "SO:0000694", "SO:0000667", "SO:0000045"],
+                "access_time": "2018-13-02T10:15-05:00"
+            },
+
+            {
+                "namespace": "taxonomy",
+                "name": "Taxonomy",
+                "ids": ["31646"], 
+                "access_time": "2018-13-02T10:15-05:00"
+            }
+        ] 
+

2.4.3 Platform/Environment “platform”

+

The multi-value reference to a particular deployment of an existing platform where this BCO can be reproduced. A platform can be a bioinformatic platform such as Galaxy or HIVE or it can be a software package such as CASAVA or apps that includes multiple algorithms and software. This is for informative purposes only.

+
    "platform": ["HIVE"]
+

2.4.4 Pipeline tools “pipeline_steps”

+

This is an optional structured domain for recording the specifics of a pipeline. Each individual tool (or a well defined and reusable script) is represented as a step, at the discretion of the author. Parallel processes are given the same step number. This is required.

+

2.4.4.1 Step Number “step_number”

+

This is a non-negative integer value representing the position of the tool in a one-dimensional representation of the pipeline. The number is a suggestion for a partial order for presentation purposes, e.g. parallel computations assigned the same number based on their first possible execution. Actual execution order might differ from the step number. Gaps are allowed (e.g. step 20 follows step 10).

+
    "step_number": 1
+

2.4.4.2 Name “name”

+

Name for the specific tool. This field is a string (A-z, 0-1) and should be a single uniquely identifying word for the tool.

+
    "name": "HIVE-hexagon"
+

2.4.4.2 Tool Description “description”

+

A free text field for describing the specific use/purpose of the tool.

+
    "description": "Alignment of reads to a set of references",
+

2.4.4.3 Tool Version “version”

+

The version assigned to the instance of the tool used corresponding to the upstream release.

+
    "version": "1.3",
+

2.4.4.4 Tool Prerequisites “prerequisite”

+

A list of text values to indicate any packages or prerequisites for running the tool used. This consists of a name and uri. The uri object consists of the filename, uri, access_time, and sha1_chksum properties. The uri is the only REQUIRED property but it is reccomended that in the prerequisites here the access_time is used as well.

+
    "prerequisite": [
+        {
+            "name": "Hepatitis C virus genotype 1", 
+            "uri": {
+                "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792",
+                "access_time": "2017-01-24T09:40:17-0500"
+            }
+        }, 
+        {
+            "name": "Hepatitis C virus type 1b complete genome", 
+            "uri": {
+                "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376",
+                "access_time": "2017-01-24T09:40:17-0500"
+            }
+        }, 
+        {
+            "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", 
+            "uri": {
+                "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261",
+                "access_time": "2017-01-24T09:40:17-0500"
+            }
+        }, 
+        {
+            "name": "Hepatitis C virus clone J8CF, complete genome", 
+            "uri": {
+                "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758",
+                "access_time": "2017-01-24T09:40:17-0500"
+            }
+        }, 
+        {
+            "name": "Hepatitis C virus S52 polyprotein gene", 
+            "uri": {
+                "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559",
+                "access_time": "2017-01-24T09:40:17-0500"
+            }
+        }
+    ]
+

2.4.4.6 Input List “input_list”

+

Each tool lists the URIs (expressed as a URN or URL) of the input files. These are a catchall for read files, reference files or any other type of input. All of these fields are optional and for descriptive purposes, therefore the structure here is less rigid than in other fields.

+
    "input_list": [
+        {
+            "uri": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=objFile&ids=514683",
+            "access_time": "2017-01-24T09:40:17-0500"
+        }, 
+        {
+            "uri": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=objFile&ids=514682",
+            "access_time": "2017-01-24T09:40:17-0500"
+        }
+    ],
+

2.4.4.7 Output List “output_list”

+

Each tool lists the URIs (expressed as a URN or URL) of the output files for that tool.

+
    "output_list": [
+        {
+            "uri": "https://hive.biochemistry.gwu.edudata/514769/allCount-aligned.csv",
+            "access_time": "2017-01-24T09:40:17-0500"
+        }
+    ]
+
+ + +
+
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/error-domain/index.html b/public/error-domain/index.html new file mode 100644 index 0000000..81f17b3 --- /dev/null +++ b/public/error-domain/index.html @@ -0,0 +1,206 @@ + + + + + + + Error Domain - BioCompute Object Documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+ + + +
+BioCompute Logo +
+
+

This document is part of the BioCompute Object User Guide

+

Back to BCO domains

+

2.8 Error Domain, acceptable range of variability “error_domain”

+

The error domain can be used to determine what range of input returns outputs that are within the tolerance level defined in this subdomain and therefore can be used to optimize algorithm. It consists of two subdomains: empirical and algorithmic.

+

The empirical error subdomain contains empirically determined values such as limits of detectability, false positives, false negatives, statistical confidence of outcomes, etc. This can be measured by running the algorithm on multiple data samples of the usability domain or through the use of carefully designed in-silico data. For example, a set of spiked, well-characterized samples can be run through the algorithm to determine the false positives, negatives, and limits of detection.

+

The algorithmic subdomain is descriptive of errors that originate by fuzziness of the algorithms, driven by stochastic processes, in dynamically parallelized multi-threaded executions, or in machine learning methodologies where the state of the machine can affect the outcome. This can be measured by taking a random subset of the data and re-running the analysis, or using some rigorous mathematical modeling of the accumulated errors and providing confidence values. For example, bootstrapping is frequently used with stochastic simulation based algorithms to accumulate sets of outcomes and estimate statistically significant variability for the results.

+

For data integration BCOs used to develop knowledgebases, the error domain can, for example, contain rules that determine inclusion in the knowledgebase and reference to data that pass and fail the set of rules.

+

The possible keys within each subdomain are workflow-specific, free text which should be readable for a human.

+
    "error_domain": {
+        "empirical_error": {
+            "false_negative_alignment_hits": "<0.0010", 
+            "false_discovery": "<0.05"
+        }, 
+
+        "algorithmic_error": { 
+            "false_positive_mutation_calls": "<0.00005", 
+            "false_discovery": "0.005"
+        }
+    }
+
+ + +
+
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/examples/HCV1a.json b/public/examples/HCV1a.json similarity index 63% rename from examples/HCV1a.json rename to public/examples/HCV1a.json index b02994a..885d20e 100644 --- a/examples/HCV1a.json +++ b/public/examples/HCV1a.json @@ -1,9 +1,9 @@ { - "object_id": "https://w3id.org/biocompute/1.3.0/examples/HCV1a.json", - "checksum": "8e03f6f65e2caae86770a64a9ebef3e0651d4675a96687e5b148fda181c9cdc0", - "spec_version" : "https://w3id.org/biocompute/1.3.0/", + "object_id": "https://raw.githubusercontent.com/biocompute-objects/BCO_Specification/1.4.2/examples/HCV1a.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/", "provenance_domain": { - "name": "HCV1a ledipasvir resistance SNP detection", + "name": "HCV1a ledipasvir resistance SNP detection", "version": "2.9", "review": [ { @@ -11,10 +11,12 @@ "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", "date": "2017-11-12T12:30:48-0400", "reviewer": { - "name": "Charles Hadley King", - "affiliation": "George Washington University", + "name": "Charles Hadley King", + "affiliation": "George Washington University", "email": "hadley_king@gwu.edu", - "contribution": ["curatedBy"], + "contribution": [ + "curatedBy" + ], "orcid": "https://orcid.org/0000-0003-1409-4549" } }, @@ -23,163 +25,189 @@ "reviewer_comment": "The revised BCO looks fine", "date": "2017-12-12T12:30:48-0400", "reviewer": { - "name": "Eric Donaldson", - "affiliation": "FDA", + "name": "Eric Donaldson", + "affiliation": "FDA", "email": "Eric.Donaldson@fda.hhs.gov", - "contribution": ["curatedBy"] + "contribution": [ + "curatedBy" + ] } } ], - "obsolete_after" : "2118-09-26T14:43:43-0400", - "embargo" : { + "obsolete_after": "2118-09-26T14:43:43-0400", + "embargo": { "start_time": "2000-09-26T14:43:43-0400", "end_time": "2000-09-26T14:43:45-0400" }, - "created": "2017-01-24T09:40:17-0500", - "modified": "2018-09-21T14:06:14-0400", + "created": "2017-01-24T09:40:17-0500", + "modified": "2018-09-21T14:06:14-0400", "contributors": [ { - "name": "Charles Hadley King", - "affiliation": "George Washington University", + "name": "Charles Hadley King", + "affiliation": "George Washington University", "email": "hadley_king@gwu.edu", - "contribution": ["createdBy", "curatedBy"], + "contribution": [ + "createdBy", + "curatedBy" + ], "orcid": "https://orcid.org/0000-0003-1409-4549" }, { - "name": "Eric Donaldson", - "affiliation": "FDA", + "name": "Eric Donaldson", + "affiliation": "FDA", "email": "Eric.Donaldson@fda.hhs.gov", - "contribution": ["authoredBy"] + "contribution": [ + "authoredBy" + ] } ], "license": "https://spdx.org/licenses/CC-BY-4.0.html" }, "usability_domain": [ - "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", - "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", - "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", + "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", + "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", + "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" ], - "extension_domain":{ - "fhir_extension": [ - { - "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", - "fhir_version": "3", - "fhir_resources": [ - { - "fhir_resource": "Sequence", - "fhir_id": "21376" - }, - { - "fhir_resource": "DiagnosticReport", - "fhir_id": "6288583" - }, - { - "fhir_resource": "ProcedureRequest", - "fhir_id": "25544" - }, - { - "fhir_resource": "Observation", - "fhir_id": "92440" - }, - { - "fhir_resource": "FamilyMemberHistory", - "fhir_id": "4588936" - } - ] - } - ], - "scm_extension": { - "scm_repository": "https://github.com/example/repo1", - "scm_type": "git", - "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", - "scm_path": "workflow/hive-viral-mutation-detection.cwl", - "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" - } - }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", + "fhir_extension": [ + { + "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", + "fhir_version": "3", + "fhir_resources": [ + { + "fhir_resource": "Sequence", + "fhir_id": "21376" + }, + { + "fhir_resource": "DiagnosticReport", + "fhir_id": "6288583" + }, + { + "fhir_resource": "ProcedureRequest", + "fhir_id": "25544" + }, + { + "fhir_resource": "Observation", + "fhir_id": "92440" + }, + { + "fhir_resource": "FamilyMemberHistory", + "fhir_id": "4588936" + } + ] + } + ] + }, + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/example/repo1", + "scm_type": "git", + "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", + "scm_path": "workflow/hive-viral-mutation-detection.cwl", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ], "description_domain": { "keywords": [ - "HCV1a", - "Ledipasvir", - "antiviral resistance", - "SNP", + "HCV1a", + "Ledipasvir", + "antiviral resistance", + "SNP", "amino acid substitutions" - ], + ], "xref": [ { "namespace": "pubchem.compound", "name": "PubChem-compound", - "ids": ["67505836"], + "ids": [ + "67505836" + ], "access_time": "2018-13-02T10:15-05:00" }, { "namespace": "pubmed", "name": "PubMed", - "ids": ["26508693"], + "ids": [ + "26508693" + ], "access_time": "2018-13-02T10:15-05:00" }, { "namespace": "so", "name": "Sequence Ontology", - "ids": ["SO:000002", "SO:0000694", "SO:0000667", "SO:0000045"], + "ids": [ + "SO:0000002", + "SO:0000694", + "SO:0000667", + "SO:0000045" + ], "access_time": "2018-13-02T10:15-05:00" }, { "namespace": "taxonomy", "name": "Taxonomy", - "ids": ["31646"], + "ids": [ + "31646" + ], "access_time": "2018-13-02T10:15-05:00" } ], - "platform": ["HIVE"], + "platform": [ + "HIVE" + ], "pipeline_steps": [ { - "step_number": 1, - "name": "HIVE-hexagon", - "description": "Alignment of reads to a set of references", - "version": "1.3", + "step_number": 1, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", "prerequisite": [ { - "name": "Hepatitis C virus genotype 1", + "name": "Hepatitis C virus genotype 1", "uri": { "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", "access_time": "2017-01-24T09:40:17-0500" } - }, + }, { - "name": "Hepatitis C virus type 1b complete genome", + "name": "Hepatitis C virus type 1b complete genome", "uri": { "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", "access_time": "2017-01-24T09:40:17-0500" } - }, + }, { - "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", "uri": { "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", "access_time": "2017-01-24T09:40:17-0500" } - }, + }, { - "name": "Hepatitis C virus clone J8CF, complete genome", + "name": "Hepatitis C virus clone J8CF, complete genome", "uri": { "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", "access_time": "2017-01-24T09:40:17-0500" } - }, + }, { - "name": "Hepatitis C virus S52 polyprotein gene", + "name": "Hepatitis C virus S52 polyprotein gene", "uri": { "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", "access_time": "2017-01-24T09:40:17-0500" } } - ], + ], "input_list": [ { "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", "access_time": "2017-01-24T09:40:17-0500" - }, + }, { "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", "access_time": "2017-01-24T09:40:17-0500" @@ -193,21 +221,21 @@ ] }, { - "step_number": 2, - "name": "HIVE-heptagon", - "description": "variant calling", - "version": "1.3", + "step_number": 2, + "name": "HIVE-heptagon", + "description": "variant calling", + "version": "1.3", "input_list": [ { "uri": "http://example.com/data/514769/dnaAccessionBased.csv", "access_time": "2017-01-24T09:40:17-0500" } - ], + ], "output_list": [ { "uri": "http://example.com/data/514801/SNPProfile.csv", "access_time": "2017-01-24T09:40:17-0500" - }, + }, { "uri": "http://example.com/data/14769/allCount-aligned.csv", "access_time": "2017-01-24T09:40:17-0500" @@ -217,26 +245,26 @@ ] }, "execution_domain": { - "script":[ + "script": [ { "uri": { "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" } } ], - "script_driver": "shell", + "script_driver": "shell", "software_prerequisites": [ { - "name": "HIVE-hexagon", + "name": "HIVE-hexagon", "version": "babajanian.1", "uri": { "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", "access_time": "2017-01-24T09:40:17-0500", "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" } - }, + }, { - "name": "HIVE-heptagon", + "name": "HIVE-heptagon", "version": "albinoni.2", "uri": { "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", @@ -246,88 +274,107 @@ ], "external_data_endpoints": [ { - "name": "HIVE", + "name": "HIVE", "url": "http://example.com/dna.cgi?cmd=login" - }, + }, { - "name": "access to e-utils", + "name": "access to e-utils", "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" } - ], + ], "environment_variables": { "HOSTTYPE": "x86_64-linux", "EDITOR": "vim" } - }, + }, "parametric_domain": [ - {"param": "seed", "value": "14", "step": "1"}, - {"param":"minimum_match_len", "value": "66", "step": "1"}, - {"param": "divergence_threshold_percent", "value": "0.30", "step": "1"}, - {"param": "minimum_coverage", "value": "15", "step": "2"}, - {"param": "freq_cutoff", "value": "0.10", "step": "2"} - ], + { + "param": "seed", + "value": "14", + "step": "1" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "1" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "1" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], "io_domain": { "input_subdomain": [ { "uri": { - "filename": "Hepatitis C virus genotype 1", + "filename": "Hepatitis C virus genotype 1", "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", "access_time": "2017-01-24T09:40:17-0500" } - }, + }, { "uri": { - "filename": "Hepatitis C virus type 1b complete genome", + "filename": "Hepatitis C virus type 1b complete genome", "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", "access_time": "2017-01-24T09:40:17-0500" } - }, + }, { "uri": { - "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", "access_time": "2017-01-24T09:40:17-0500" } - }, + }, { "uri": { "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", "access_time": "2017-01-24T09:40:17-0500" } - }, + }, { "uri": { - "filename": "Hepatitis C virus S52 polyprotein gene", + "filename": "Hepatitis C virus S52 polyprotein gene", "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", "access_time": "2017-01-24T09:40:17-0500" } }, { "uri": { - "filename": "HCV1a_drug_resistant_sample0001-01", + "filename": "HCV1a_drug_resistant_sample0001-01", "uri": "http://example.com/nuc-read/514682", "access_time": "2017-01-24T09:40:17-0500" } - }, + }, { - "uri": { "filename": "HCV1a_drug_resistant_sample0001-02", "uri": "http://example.com/nuc-read/514683", "access_time": "2017-01-24T09:40:17-0500" } } - ], + ], "output_subdomain": [ { - "mediatype": "text/csv", - "uri": { + "mediatype": "text/csv", + "uri": { "uri": "http://example.com/data/514769/dnaAccessionBased.csv", "access_time": "2017-01-24T09:40:17-0500" } }, { - "mediatype": "text/csv", + "mediatype": "text/csv", "uri": { "uri": "http://example.com/data/514801/SNPProfile*.csv", "access_time": "2017-01-24T09:40:17-0500" @@ -337,12 +384,12 @@ }, "error_domain": { "empirical_error": { - "false_negative_alignment_hits": "<0.0010", + "false_negative_alignment_hits": "<0.0010", "false_discovery": "<0.05" - }, - "algorithmic_error": { - "false_positive_mutation_calls_discovery": "<0.00005", + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", "false_discovery": "0.005" } } -} +} \ No newline at end of file diff --git a/public/examples/HIVE_metagenomics.json b/public/examples/HIVE_metagenomics.json new file mode 100644 index 0000000..ef13b4e --- /dev/null +++ b/public/examples/HIVE_metagenomics.json @@ -0,0 +1,273 @@ +{ + "object_id": "https://raw.githubusercontent.com/biocompute-objects/BCO_Specification/1.4.2/examples/HIVE_metagenomics.json", + "etag": "caed07395b6afb58c8810d174a315260124f687740bc3bb14387de5e84c7e3d4", + "spec_version" : "https://w3id.org/ieee/ieee-2791-schema/", + "provenance_domain": { + "name": "Healthy human fecal metagenomic diversity", + "version": "1.0.0", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": ["curatedBy"], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + } + ], + "obsolete_after" : "2118-09-26T14:43:43-0400", + "embargo" : { + "start_time": "2000-09-26T14:43:43-0400", + "end_time": "2000-09-26T14:43:45-0400" + }, + "created": "2018-11-29T11:29:08-0500", + "modified": "2018-11-30T11:29:08-0500", + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": ["createdBy", "curatedBy", "authoredBy"], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Raja Mazumder", + "affiliation": "George Washington University", + "email": "mazumder@gwu.edu", + "contribution": ["createdBy", "curatedBy", "authoredBy"], + "orcid": "https://orcid.org/0000-0001-88238-9945" + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify the most common organism present in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "Identify the general community composition of organisms in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "CensuScope is used to do a census of the composition of the read files. Based on a user-defined threshold, organisms identified are used for alignment in the Hexagon alignment." + ], + "extension_domain":[ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", + "scm_type": "git", + "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", + "scm_path": "biocompute-objects/HIVE_metagenomics", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ], + "description_domain": { + "keywords": [ + "metagenome", + "metagenomic analysis", + "fecal" + ], + "xref": [ + { + "namespace": "uberon", + "name": "Uber Anatomy Ontology", + "ids": ["0001988"], + "access_time": "2016-11-30T06:46-0500" + }, + { + "namespace": "taxonomy", + "name": "Taxonomy", + "ids": ["9606"], + "access_time": "2016-11-30T06:46-0500" + } + ], + "platform": ["hive"], + "pipeline_steps": [ + { + "step_number": 1, + "name": "CensuScope", + "description": "Detect taxonomic composition of a metagenomic data set.", + "version": "1.3", + "prerequisite": [ + { + "name": "Filtered_NT_feb18_2016", + "uri": { + "uri": "https://hive.biochemistry.gwu.edu/genome/513957", + "access_time": "2016-11-30T06:46-0500" + } + } + ], + "input_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546223/dnaAccessionBasedResult.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/546223/dnaAccessionBased.csv", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546232/alCount-Unalignedo524569-alCount--1.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/biocompute-objects/HIVE_metagenomics/blob/master/driverHIVEmetagenomic.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "CensuScope", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-screening&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "key": "HOSTTYPE", + "value" : "x86_64-linux" + } + }, + "parametric_domain": [ + {"param": "seed", "value": "14", "step": "2"}, + {"param":"minimum_match_len", "value": "66", "step": "2"}, + {"param": "divergence_threshold_percent", "value": "0.30", "step": "2"}, + {"param": "minimum_coverage", "value": "15", "step": "2"}, + {"param": "freq_cutoff", "value": "0.10", "step": "2"} + ], + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus clone J8CF, complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + } +} diff --git a/public/examples/LICENSE b/public/examples/LICENSE new file mode 100644 index 0000000..d4c4219 --- /dev/null +++ b/public/examples/LICENSE @@ -0,0 +1,25 @@ +BSD 2-Clause License + +Copyright (c) 2018, BioCompute +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/public/examples/UVP.json b/public/examples/UVP.json new file mode 100644 index 0000000..be6983a --- /dev/null +++ b/public/examples/UVP.json @@ -0,0 +1,1149 @@ +{ + "object_id": "https://w3id.org/biocompute/1.3.0/examples/UVP_BCO.json", + "etag": "39fb1c62f43ff72ac95f91a433d5e425fb08bc07ec0f719ecfd27fb3cd3a3635", + "spec_version": "https://w3id.org/biocompute/1.3.0/", + "provenance_domain": { + "name": "Lineage assignment for an isolate of M. tuberculosis based on its single nucleotide polymorphism (SNP) profile based on UVC v1.0.", + "version": "v1.0", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Anjan Purkayastha", + "affiliation": "George Washington University", + "email": "anjan.purkayastha@gmail.com", + "contribution": [ + "curatedBy" + ] + } + }, + { + "status": "approved", + "reviewer_comment": "Approved by Critical Path Institute staff.", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Marco Schito", + "affiliation": "Critical Path Institute", + "email": "mschito@c-path.org", + "contribution": [ + "curatedBy" + ] + } + }, + { + "status": "approved", + "date": "2017-11-12T12:30:48-0400", + "reviewer_comment": "Approved by Critical Path Institute staff.", + "reviewer": { + "name": "Kenneth Ramey", + "affiliation": "Critical Path Institute", + "email": "kramey@c-path.org", + "contribution": [ + "curatedBy" + ] + } + } + ], + "obsolete_after": "2118-09-26T14:43:43-0400", + "embargo": { + "start_time": "2000-09-26T14:43:43-0400", + "end_time": "2018-10-08T18:02:33-0400" + }, + "created": "2017-11-12T12:30:48-0400", + "modified": "2018-10-08T18:35:33-0400", + "contributors": [ + { + "name": "Matthew Ezewudo", + "affiliation": "Critical Path Institute", + "email": "mezewudo@c-path.org", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Jamie Posie", + "affiliation": "CDC Atlanta, GA", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Anjan Purkayastha", + "affiliation": "George Washington University", + "email": "anjan.purkayastha@gmail.com", + "contribution": [ + "authoredBy", + "curatedBy" + ] + }, + { + "name": "Marco Schito", + "affiliation": "Critical Path Institute", + "email": "mschito@c-path.org", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "authoredBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "ReseqTB Consortium", + "affiliation": "Critical Path Institute", + "email": "info@c-path.org", + "contribution": [ + "createdAt" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Lineage assignment for an isolate of M. tuberculosis[taxonomy:1773] based on its single nucleotide polymorphism [so:0000694] (SNP) profile." + ], + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/CPTR-ReSeqTB/UVP", + "scm_type": "git", + "scm_commit": "9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3", + "scm_path": "UVP/scripts/UVP.py" + } + } + ], + "description_domain": { + "keywords": [ + "Mycobacterium tuberculosis", + "Phylogenetics", + "Bacterial lineage analysis", + "Single Nucleotide Polymorphism", + "SNP" + ], + "xref": [ + { + "namespace": "pubmed", + "name": "PubMed", + "ids": [ + "00000" + ], + "access_time": "2018-13-02T10:15-05:00" + }, + { + "namespace": "so", + "name": "Sequence Ontology", + "ids": [ + "0000694" + ], + "access_time": "2018-13-02T10:15-05:00" + }, + { + "namespace": "taxonomy", + "name": "Taxonomy", + "ids": [ + "1773" + ], + "access_time": "2018-13-02T10:15-05:00" + } + ], + "platform": [ + "Linux" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "FastQValidator", + "description": "To verify if input file is in fastq format", + "version": "1.0.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/validation/Validation_report.txt" + } + ] + }, + { + "step_number": 2, + "name": "FastQC", + "description": "assess Quality of raw sequence reads", + "version": "0.11.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.html" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.zip" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.html" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.zip" + } + ] + }, + { + "step_number": 3, + "name": "Kraken", + "description": "Assesses species specificity of sequence reads", + "version": "0.10.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/kraken/final_report.txt" + } + ] + }, + { + "step_number": 4, + "name": "BWA", + "description": "Aligns sequence reads to reference genome", + "version": "0.7.12", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ] + }, + { + "step_number": 5, + "name": "Qualimap", + "description": "Assess mapping quality of aligned reads", + "version": "2.1.1", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/agogo.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/ajax-loader.gif" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/basic.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgfooter.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgtop.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-bright.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-close.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/doctools.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down-pressed.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/file.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/jquery.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/minus.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/plus.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/pygments.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/qualimap_logo_small.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/report.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/searchtools.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/underscore.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up-pressed.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/websupport.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_0to50_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_quotes.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_gc_content_per_window.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_homopolymer_indels.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_clipping_profile.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_content_per_read_position.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_uniq_read_starts_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/duplication_rate_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/genome_fraction_coverage.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/homopolymer_indels.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_clipping_profile.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_gc-content_distribution.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_nucleotide_content.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/genome_results.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/qualimapReport.html" + } + ] + }, + { + "step_number": 6, + "name": "MarkDuplicates", + "description": "Removes duplicate reads from alignment", + "version": "1.134", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" + } + ] + }, + { + "step_number": 7, + "name": "IndelRealigner", + "description": "Perfoms re-alignment around insertions and deletions", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" + } + ] + }, + { + "step_number": 8, + "name": "BaseRecalibrator", + "description": "Recalibrates base quality scores", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + }, + { + "name": "Variation sites file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ] + }, + { + "step_number": 9, + "name": "BuildBamIndex", + "description": "Indexes sorted BAM files for variant calling", + "version": "1.134", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bai" + } + ] + }, + { + "step_number": 10, + "name": "UnifiedGenotyper", + "description": "Calls variant positions in alignment", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.mpileup" + } + ] + }, + { + "step_number": 11, + "name": "VCFtools", + "description": "Filters raw VCF to exclude poor quality variants", + "version": "0.1.12b", + "prerequisite": [ + { + "name": "Excluded list file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + ] + }, + { + "step_number": 12, + "name": "SnpEff", + "description": "Annotates variants in VCF file", + "version": "4.1", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv GenBank File", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" + } + ] + }, + { + "step_number": 13, + "name": "parse_annotation.py", + "description": "Parses annotated VCF to create annotation text file", + "version": "", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" + } + ] + }, + { + "step_number": 14, + "name": "lineage_parser.py", + "description": "Assigns Mycobacterium tuberculosis Complex lineage to isolate", + "version": "", + "prerequisite": [ + { + "name": "Lineage Markers File", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" + } + ] + }, + { + "step_number": 15, + "name": "BEDtools", + "description": "Creates loci based coverage statistics of genome coverage", + "version": "2.17.0", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" + } + ] + }, + { + "step_number": 16, + "name": "resis_parser.py", + "description": "Creates a coverage depth and width table of all loci in isolate genome", + "version": "", + "input_list": [ + { + "uri": "[path_to_genome_loci_text_file]" + }, + { + "uri": "[path_to_per_position_depth_text_file]" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/CPTR-ReSeqTB/UVP/commit/9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3" + } + } + ], + "script_driver": "Python", + "software_prerequisites": [ + { + "name": "BEDtools", + "version": "2.17.0", + "uri": { + "uri": "https://github.com/arq5x/bedtools/releases/tag/v2.17.0", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "5e4507c54355a4a38c6d3e7497a2836a123c6655" + } + }, + { + "name": "Bcftools", + "version": "1.2", + "uri": { + "uri": "https://github.com/samtools/bcftools/releases/download/1.2/bcftools-1.2.tar.bz2", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "352908143497da0640b928248165e83212dc4298" + } + }, + { + "name": "BWA", + "version": "0.7.12", + "uri": { + "uri": "https://sourceforge.net/projects/bio-bwa/files/bwa-0.7.12.tar.bz2/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "6389ca75328bae6d946bfdd58ff4beb0feebaedd" + } + }, + { + "name": "FastQC", + "version": "0.11.5", + "uri": { + "uri": "https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/fastq_screen_v0.13.0.tar.gz", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "GATK", + "version": "3.4.0", + "uri": { + "uri": "https://github.com/broadgsa/gatk-protected/releases/tag/3.4", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "f19618653a0d23baaf147efe7f14aeb4eeb0cbb8" + } + }, + { + "name": "Kraken", + "version": "0.10.5", + "uri": { + "uri": "https://ccb.jhu.edu/software/kraken/dl/kraken-0.10.5-beta.tgz", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Picard", + "version": "1.134", + "uri": { + "uri": "https://github.com/broadinstitute/picard/releases/tag/1.134", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "a7a08c474e4d99346eec7a9956a8fe71943b5d80" + } + }, + { + "name": "Pigz", + "version": "2.3.3", + "uri": { + "uri": "http://springdale.math.ias.edu/data/puias/unsupported/7/SRPMS/pigz-2.3.3-1.sdl7.src.rpm", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Qualimap", + "version": "2.11", + "uri": { + "uri": "https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.1.1.zip", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Samtools", + "version": "1.2", + "uri": { + "uri": "https://github.com/samtools/samtools/archive/1.2.zip", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "SnpEff", + "version": "4.1", + "uri": { + "uri": "https://sourceforge.net/projects/snpeff/files/snpEff_v4_1l_core.zip/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "c96e21564b05d6a7912e4dd35f9ef6fe2e094fbb" + } + }, + { + "name": "Vcftools", + "version": "0.1.12b", + "uri": { + "uri": "https://sourceforge.net/projects/vcftools/files/vcftools_0.1.12.tar.gz/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "29a1ab67786e39be57cbb1ef4e0f6682110b7516" + } + } + ], + "external_data_endpoints": [ + { + "name": "BCOReSeqTB", + "url": "https://github.com/CPTR-ReSeqTB/UVP/" + } + ], + "environment_variables": { + "CORE": "8" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Mycobacterium tuberculosis H37Rv, complete genome", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + }, + { + "uri": { + "filename": "Mycobacterium tuberculosis H37Rv, complete genome", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" + } + }, + { + "uri": { + "filename": "excluded_loci", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" + } + }, + { + "uri": { + "filename": "lineage_markers", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" + } + }, + { + "uri": { + "filename": "variation sites", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" + } + }, + { + "uri": { + "filename": "ERR552106_2.fastq.gz", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + }, + { + "uri": { + "filename": "ERR552106_1.fastq.gz", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.log" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106ERR552106_Final_annotation.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Lineage.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_deleted_loci.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" + } + } + ] + }, + "error_domain": { + "empirical_error": { + "description": [ + "This test object represents tests done with single lineage sequences to establish the sensitivity of UVP to detect lineage and antibiotic resistant variants", + "Test objective was to evaluate the ability of UVP to identify strain lineage and antibiotic resistant variants from samples of high, medium, low sequence qualities and depths of coverage of 10, 15, 20, 25 and 30-fold. Simulated reads developed from 12 lineage-specific M. tuberculosis (Mtb) genome sequences were used to test UVP." + ], + "parameters": { + "sample_type": "single Mtb lineages (n = 12) with antibiotic resistant variants introduced in silico", + "total_sample_size": "180", + "platform": "Illumina HiSeq 2000", + "paired_end": true, + "length": "100", + "simulated": true, + "program": "ART", + "simulator_parameters": [ + { + "ss": "hs20" + }, + { + "l": "100" + }, + { + "m": "500" + }, + { + "qU": "45" + }, + { + "s": "100" + } + ], + "sequence_quality_level_parameters": { + "description": "these correspond to the ART parameters: qs, qs2, ir, ir2, dr, dr2.", + "sequence_quality_high": { + "substitution_error_rate_R1": "0.0004", + "substitution_error_rate_R2": "0.0007", + "insertion_error_rate_R1": "0.00009", + "insertion_error_rate_R2": "0.00015", + "deletion_error_rate_R1": "0.00011", + "deletion_error_rate_R2": "0.00023", + "units": "errors per sequenced base" + }, + "sequence_quality_medium": { + "substitution_error_rate_R1": "0.004", + "substitution_error_rate_R2": "0.007", + "insertion_error_rate_R1": "0.0009", + "insertion_error_rate_R2": "0.0015", + "deletion_error_rate_R1": "0.0011", + "deletion_error_rate_R2": "0.0023", + "units": "errors per sequenced base" + }, + "sequence_quality_low": { + "substitution_error_rate_R1": "0.04", + "substitution_error_rate_R2": "0.07", + "insertion_error_rate_R1": "0.009", + "insertion_error_rate_R2": "0.015", + "deletion_error_rate_R1": "0.011", + "deletion_error_rate_R2": "0.023", + "units": "errors per sequenced base" + } + } + }, + "summary results": { + "sequence_quality_high": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "93.33", + "mean_AR_identification_rate": "86.72", + "Units": "Percentage" + } + }, + "sequence_quality_medium": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "90.00", + "mean_AR_identification_rate": "81.00", + "Units": "Percentage" + } + }, + "sequence_quality_low": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_10": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "41.67", + "mean_AR_identification_rate": "22.42", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "63.89", + "mean_AR_identification_rate": "57.14", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.46", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.66", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.66", + "Units": "Percentage" + } + } + }, + "detailed results": [ + { + "sequence_quality_high": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "40.75", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "92.85", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + } + } + }, + { + "sequence_quality_medium": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "58.34", + "mean_AR_identification_rate": "26.50", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "91.66", + "mean_AR_identification_rate": "78.57", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "99.40", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + } + } + }, + { + "sequence_quality_low": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + } + } + } + ] + }, + "algorithmic_error": { + "placeholder": "for algorithmic error domain" + } + } +} diff --git a/public/examples/glycosylation-sites-UniCarbKB.json b/public/examples/glycosylation-sites-UniCarbKB.json new file mode 100644 index 0000000..be49e02 --- /dev/null +++ b/public/examples/glycosylation-sites-UniCarbKB.json @@ -0,0 +1,266 @@ +{ + "object_id": "https://raw.githubusercontent.com/biocompute-objects/BCO_Specification/1.4.2glycosylation-sites-UniCarbKB", + "etag": "5741d66ddf7881db33f7075ce8b64b941bd7cc001965f31682e5da9966c7f3ba", + "spec_version" : "https://w3id.org/ieee/ieee-2791-schema/", + "provenance_domain":{ + "name": "glycosylation-sites-UniCarbKB", + "version": "1.0", + "review":[ + { + "status": "approved", + "reviewer_comment": "The dataset has passed the manual and automated QC steps and the readme has also been reviewed", + "reviewer":{ + "name": "Rahi Navelkar", + "affiliation": "The George Washington University", + "email": "rsn13@gwu.edu", + "contribution":["curatedBy"] + } + } + ], + "created": "2018-02-21T14:46:55-5:00", + "modified": "2018-10-10T11:34:02-5:00", + "contributors":[ + { + "name": "Matthew Campbell", + "affiliation": "Institute for Glycomics, Griffith University, Gold Coast, Queensland, Australia", + "email": "m.campbell2@griffith.edu.au", + "contribution":["contributedBy"] + }, + { + "name": "Rahi Navelkar", + "affiliation": "The George Washington University", + "email": "rsn13@gwu.edu", + "contribution":["curatedBy"] + }, + { + "name": "Robel Kahsay", + "affiliation": "The George Washington University", + "email": "hadley_king@gwu.edu", + "contribution":["createdBy"] + } + ], + "license": "https://creativecommons.org/licenses/by/4.0/" + }, + "usability_domain":[ + "List of human [taxid:9606] proteins with information on glycosylation sites from UniCarbKB database [https://academic.oup.com/nar/article/42/D1/D215/1052197, https://doi.org/10.1093/nar/gkt1128]" + ], + "extension_domain":[ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/license/license_extension.json", + "license_extension":{ + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "scripts_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + } + }, + { + + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension":{ + "scm_repository": "https://github.com/GW-HIVE/glygen-backend-integration/", + "scm_type": "git", + "scm_commit": "d34b85553e775dd5452005d786fe6e47d6048ee0", + "scm_path": "/data/projects/glygen/generated/datasets/reviewed/human_proteoform_glycosylation_sites_unicarbkb_glytoucan.readme.txt" + } + } + + ], + "description_domain":{ + "keywords":[ + "protein", + "canonical", + "glycosylation", + "glycan" + ], + "xref":[ + { + "namespace": "taxonomy", + "name": "Taxonomy", + "ids": ["9606"], + "access_time": "2018-21-02T14:46:55-5:00" + } + ], + "platform": ["centos7"], + "pipeline_steps":[ + { + "step_number":1, + "name": "ac2canonical.py", + "description": "Python script for mapping the UniProtKB accessions in the input file to the UniProtKB canonical accessions ", + "version": "", + "input_list":[ + { + "uri": "/human_protein_position_pmid_id_aminoacid_glytoucan_2018_09_04_07_51_27.txt" + } + ], + "output_list":[ + { + "uri": "human_protein_position_pmid_id_aminoacid_glytoucan_2018_09_04_07_51_27.txt" + } + ] + }, + { + "step_number":2, + "name": "make-proteoform_glycosylation_sites_unicarbkb_glytoucan-csv-step2b.py", + "description": "Python scripts for retrieving glycosylation type or linkage type through UniCarbKB structure webpage ", + + "input_list":[ + {"uri": "human_protein_position_pmid_id_aminoacid_glytoucan_2018_09_04_07_51_27.txt"} + ], + "output_list":[ + {"uri": "human_proteoform_glycosylation_sites_unicarbkb_glytoucan.csv"} + ] + }, + { + "step_number":2, + "name": "make-proteoform_glycosylation_sites_unicarbkb_glytoucan-csv-step2b.py", + "description": "Python scripts for retrieving glycosylation type or linkage type through UniCarbKB structure webpage ", + "input_list":[ + {"uri": "human_protein_position_pmid_id_aminoacid_glytoucan_2018_09_04_07_51_27.txt"} + ], + "output_list":[ + {"uri": "human_proteoform_glycosylation_sites_unicarbkb_glytoucan.csv"} + ] + }, + { + "step_number":3, + "name": "make-proteoform_glycosylation_sites_unicarbkb_glytoucan-csv-step3.py", + "description": "Python script for quality check of the processed file. Records which fall under one or more following criteria's are flagged and eliminated and can be accessed using the log file. The elimination steps include - a. If the protein accession is not included in UniProtKB protein list - UniProtKB Nov-2017 Release b. If the amino acid position does not match to the amino acid on the associated position on fasta sequence - UniProtKB Nov-2017 Release c. If the id (UnicarbKB structure id) is not present in input file d. If the glycosylation type (linkage type) is not retrieved through step 3 e. If a serine or threonine is reported for an N-linked glycan structure f. If an asparagine is reported for an O-linked glycan structure", + "input_list":[ + {"uri": "human_proteoform_glycosylation_sites_unicarbkb_glytoucan.csv"}, + {"uri": "human_protein_all.fasta"} + ], + "output_list":[ + {"uri": "human_proteoform_glycosylation_sites_unicarbkb_glytoucan.csv"}, + {"uri": "human_proteoform_glycosylation_sites_unicarbkb_glytoucan.log"} + ] + } + ] + }, + "execution_domain":{ + "script":[ + { + "uri": { + "uri": "https://github.com/glygener/glygen-backend-integration/blob/master/integration/ac2canonical.py" + } + }, + { + "uri": { + "uri": "https://github.com/glygener/glygen-backend-integration/blob/master/integration/make-proteoform_glycosylation_sites_unicarbkb_glytoucan-csv-step2a.py" + } + }, + { + "uri": { + "uri": "https://github.com/glygener/glygen-backend-integration/blob/master/integration/make-proteoform_glycosylation_sites_unicarbkb_glytoucan-csv-step2b.py" + } + }, + { + "uri": { + "uri": "https://github.com/glygener/glygen-backend-integration/blob/master/integration/make-proteoform_glycosylation_sites_unicarbkb_glytoucan-csv-step3.py" + } + } + ], + "script_driver": "manual", + "software_prerequisites":[ + { + "name": "Python", + "version": "2.7.13", + "uri": { + "uri": "https://www.python.org/downloads/release/python-2713/", + "access_time": "2017-01-24T09:40:17-0500", + "sha1_checksum": "17add4bf0ad0ec2f08e0cae6d205c700" + } + } + ], + "external_data_endpoints": [ + { + "name": "UniCarbKB", + "url": "http://www.unicarbkb.org/" + }, + { + "name": "access glygen-backend-integration", + "url": "https://github.com/glygener/glygen-backend-integration" + } + ], + "environment_variables":{ + + } + }, + "io_domain":{ + "input_subdomain":[ + { + "uri":{ + "filename": "human_protein_position_pmid_id_aminoacid_glytoucan_2018_09_04_07_51_27.txt", + "uri": "http://data.glygen.org/datasets/source/human_protein_position_pmid_id_aminoacid_glytoucan_2018_09_04_07_51_27.txt", + "access_time": "2018-10-10T11:34:02-5:00" + } + }, + { + "uri":{ + "filename": "human_protein_all.fasta", + "uri": "http://data.glygen.org/GLYDS00053", + "access_time": "2018-10-10T11:34:02-5:00" + } + } + ], + "output_subdomain":[ + { + "mediatype": "csv/text", + "uri":{ + "filename": "human_proteoform_glycosylation_sites_unicarbkb_glytoucan.log", + "uri": "http://data.glygen.org/datasets/logs/human_proteoform_glycosylation_sites_unicarbkb_glytoucan.log", + "access_time": "2018-10-10T11:37:02-5:00" + } + }, + { + "mediatype": "csv/text", + "uri":{ + "filename": "human_proteoform_glycosylation_sites_unicarbkb_glytoucan.csv", + "uri": "http://data.glygen.org/GLYDS00040", + "access_time": "2018-10-10T11:37:02-5:00" + } + } + ] + }, + "error_domain":{ + "empirical_error":{ + "comment": "Unique value statistics for the dataset", + "statistics":[ + { + "key": "uniprotkb_canonical_ac", + "value":92, + "description": "Accession assigned to the protein isoform chosen to be the canonical sequence in UniProtKB database" + }, + { + "key": "glycosylation_site", + "value":223, + "description": "Site on the protein sequence where glycosylation is observed" + }, + { + "key": "evidence", + "value":163, + "description": "NCBI PubMed Id (PMID) as evidence for the entry" + }, + { + "key": "unicarbkb_id", + "value":984, + "description": "UnicarbKB data structure identifier" + }, + { + "key": "glytoucan_ac", + "value":824, + "description": "Unique accession assigned to the registered glycan structure in GlyTouCan database" + }, + { + "key": "amino_acid", + "value":3, + "description": "Three letter code abbreviation of the amino acid" + }, + { + "key": "glycosylation_type", + "value":3, + "description": "Type of glycosylation linkage type" + } + ] + }, + "algorithmic_error":{} + } +} diff --git a/public/examples/index.html b/public/examples/index.html new file mode 100644 index 0000000..dd08008 --- /dev/null +++ b/public/examples/index.html @@ -0,0 +1,188 @@ + + + + + + + Examples - BioCompute Object Documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+ + + + +
+ +
+

Sections

+
    + +
  • + +
  • + +
+
+
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/examples/index.xml b/public/examples/index.xml new file mode 100644 index 0000000..db08176 --- /dev/null +++ b/public/examples/index.xml @@ -0,0 +1,22 @@ + + + + Examples on BioCompute Object Documentation + /examples/ + Recent content in Examples on BioCompute Object Documentation + Hugo -- gohugo.io + en-us + + + /examples/readme/ + Mon, 01 Jan 0001 00:00:00 +0000 + + /examples/readme/ + BCO Examples A repository for BCO example flat files. +Table of Contents: HCV1a - This BCO was developed with the Reproducibility and Interpretation use case in mind. This is the archetypal BCO example and is in the BCO Specification repository. + glycosylation-sites-UniCarbKB - This BCO was developed with the Data integration use case in mind. The full repository is available here + UVP - This BCO was developed with the Accountability use case in mind. + + + + diff --git a/public/examples/readme/index.html b/public/examples/readme/index.html new file mode 100644 index 0000000..b128e59 --- /dev/null +++ b/public/examples/readme/index.html @@ -0,0 +1,194 @@ + + + + + + + - BioCompute Object Documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+ +

BCO Examples

+

A repository for BCO example flat files.

+

Table of Contents:

+ + + + +
+
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/execution-domain/index.html b/public/execution-domain/index.html new file mode 100644 index 0000000..e80a631 --- /dev/null +++ b/public/execution-domain/index.html @@ -0,0 +1,260 @@ + + + + + + + Execution Domain - BioCompute Object Documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+ + + +
+BioCompute Logo +
+
+

This document is part of the BioCompute Object User Guide

+

Back to BCO domains

+

2.5 Execution Domain “execution_domain”

+

This section defines the execution_domain part of the BCO.

+

The fields required for execution of the BCO have been encapsulated together in order to clearly separate information needed for deployment, software configuration and running applications in a dependent environment. One byproduct of an accurate BCO definition is facilitation of reproducibility as defined by the Oxford English Dictionary as “the extent to which consistent results are obtained when produced repeatedly.”

+

Condensed example:

+
"execution_domain": {
+        "script_access_type": "text",
+        "script": [
+        ],
+        "script_driver": "manual",
+        "software_prerequisites": [
+        ],
+        "external_data_endpoints": [
+        ], 
+        "environment_variables": {
+        }
+}
+

2.5.1 Script “script”

+

The Script field points to internal or external references to a script object that was used to perform computations for this BCO instance. This may be a reference to an object in GitHub, a computational service or any other type of script.

+
 "script": ["https://example.com/workflows/antiviral_resistance_detection_hive.py"]
+

2.5.2 Script driver “script_driver”

+

This field provides a space to indicate what kind of executable can be launched in order to perform a sequence of commands described in the script (see above) in order to run the pipeline.

+
"script_driver": "shell"
+

2.5.3 Algorithmic tools and Software Prerequisites “software_prerequisites”

+

An optional multi-value field listing the minimal necessary prerequisites, library, tool versions needed to successfully run the script to produce BCO. The keys are name, version, and uri.

+
        "software_prerequisites": [
+            {
+                "name": "HIVE-hexagon", 
+                "version": "babajanian.1",
+                "uri": {
+                    "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-",
+                    "access_time": "2017-01-24T09:40:17-0500",
+                    "sha1_chksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c"
+                }
+            }, 
+            {
+                "name": "HIVE-heptagon", 
+                "version": "albinoni.2",
+                "uri": {
+                    "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-",
+                    "access_time": "2017-01-24T09:40:17-0500"
+                }
+            }
+        ]
+

2.5.4 External Data Endpoints “external_data_endpoints”

+

An optional multi-value field listing the minimal necessary domain specific external data source access in order to successfully run the script to produce BCO. The values under this field present the requirements for network protocol endpoints used by a pipeline’s scripts, or other software.

+

The key url defines an endpoint to be accessed. If the path of the URL is / then any resource at the given domain may be accessed, while if the path is more specific than only resources which path prefix matches may be accessed.

+

The key name should describe the service that is accessed.

+
"external_data_endpoints": [
+
+    {"url": "protocol://domain:port/application/path","name": "generic name"},
+
+    {"url": "ftp://data.example.com:21/",
+    "name": "access to ftp server"},
+
+    {"url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils",
+    "name": "access to e-utils web service"}
+]
+

2.5.5 Environment Variables “environment_variables”

+

This is an array of key-value pairs useful to configure the execution environment on the target platform. For example, one might specify the number of compute cores, or available memory use of the script. The possible keys are specific to each platform. The “value” should be a JSON string. +The regex is based on the following:

+
+

http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html +Environment variable names used by the utilities in the Shell and Utilities volume of IEEE Std 1003.1-2001 consist solely of uppercase letters, digits, and the ‘_’ (underscore) from the characters defined in Portable Character Set and do not begin with a digit. Other characters may be permitted by an implementation; applications shall tolerate the presence of such names. Uppercase and lowercase letters shall retain their unique identities and shall not be folded together. The name space of environment variable names containing lowercase letters is reserved for applications. Applications can define any environment variables with names from this name space without modifying the behavior of the standard utilities. +Note: +Other applications may have difficulty dealing with environment variable names that start with a digit. For this reason, use of such names is not recommended anywhere.

+
+
        "environment_variables": {
+            "HOSTTYPE": "x86_64-linux",
+            "EDITOR": "vim"
+        }
+    }
+
+ + +
+
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/extension-fhir/index.html b/public/extension-fhir/index.html new file mode 100644 index 0000000..32d0ca1 --- /dev/null +++ b/public/extension-fhir/index.html @@ -0,0 +1,224 @@ + + + + + + + BCO Introduction - BioCompute Object Documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+ + + +
+BioCompute Logo +
+
+

This document is part of the BioCompute Object User Guide

+

Back to BCO domains

+

2.3.1 Extension to External References: SMART on FHIR Genomics

+

The external references example extension to FHIR resource demonstrates how specific data elements can be extracted from EHR systems or other secure FHIR endpoints via technologies such as SMART on FHIR Genomics (https://www.ncbi.nlm.nih.gov/pubmed/26198304) without compromising patient and providers’ information. This is because the portions being transferred contain no identifiable information about the patient. Instead there is a reference to the actual resource instance (via FHIR URL) through which all data is accessed.

+

The fhir_extension is defined as an array of endpoints from which to fetch resources.

+

fhir_endpoint is a string containing the URL of the endpoint of the FHIR server containing the resource. fhir_version must be present showing the FHIR version used.

+

fhir_resources is an array of resources to fetch from the endpoint, where fhir_resource is a string containing the type of resource used according to the specified version. (a full list of permitted FHIR 3 resources is available at http://hl7.org/fhir/STU3/resourcelist.html) fhir_id is a string containing the server-specific identifier for the resource instance.

+

The link to FHIR can also be added to the usability domain. More on FHIR Genomics in release 3 of FHIR can be found here: https://www.hl7.org/fhir/genomics.html

+

SMART on FHIR Genomics provides a framework for EHR-based apps built on FHIR that integrate clinical and genomic information. For more information on how to use the SMART on FHIR Genomics apps, please visit http://projects.iq.harvard.edu/smartgenomics/.

+
    "extension_domain":{
+        "fhir_extension": [
+            {
+                "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3",
+                "fhir_version": "3",
+                "fhir_resources": [
+                    {
+                        "fhir_resource": "Sequence",
+                        "fhir_id": "21376"
+                    },
+                    {
+                        "fhir_resource": "DiagnosticReport",
+                        "fhir_id": "6288583"
+                    },
+                    {
+                        "fhir_resource": "ProcedureRequest",
+                        "fhir_id": "25544"
+                    },
+                    {
+                        "fhir_resource": "Observation",
+                        "fhir_id": "92440"
+                    },
+                    {
+                        "fhir_resource": "FamilyMemberHistory",
+                        "fhir_id": "4588936"
+                    }
+                ]
+            }
+
+ + +
+
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/extension-scm/index.html b/public/extension-scm/index.html new file mode 100644 index 0000000..2aec0f5 --- /dev/null +++ b/public/extension-scm/index.html @@ -0,0 +1,210 @@ + + + + + + + Extension to External References: Software Configuration Management (SCM) - BioCompute Object Documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+ + + +
+BioCompute Logo +
+
+

This document is part of the BioCompute Object User Guide

+

Back to BCO domains

+

2.3.2 Extension to External References: Software Configuration Management (SCM)

+

The external references example extension to a SCM repository demonstrates how a BioCompute Object software source code can be stored/deposited/downloaded. The BCO would contain links to the SCM repository where the information is stored and easily retrieved. The links to the SCM can be added to the usability domain as well.

+
"extension_domain":{
+        "scm_extension": {
+            "scm_repository": "https://github.com/example/repo1",
+            "scm_type": "git",
+            "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21",
+            "scm_path": "workflow/hive-viral-mutation-detection.cwl",
+            "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl"
+      }
+}
+

2.3.2.1 SCM Repository “scm_repository”

+

The base url for the SCM repository.

+

2.3.2.2 SCM Type “scm_type”

+

A classifier for the type of SCM database. This feild is a list of predefined values. Third-party scm types can be used, and if so the other value MUST be used. The options for this field include git (Git, including GitHub/GitLab), svn (Subversion), hg (mercurial) and other.

+

2.3.2.3 SCM Commit “scm_commit”

+

This field is a reference to a revision within the scm repository. This SHOULD be a repository-wide commit identifier (e.g. afba51a222e199f5b58f9d19450f189055e93c44 or name of a tag (e.g. v1.0.0), but MAY be a name of a branch (e.g. master).

+

2.3.2.4 SCM Path “scm_path”

+

This is the path from the repository to the source code referenced. scm_path should NOT start with /

+

2.3.2.5 SCM Preview “scm_preview”

+

The full uri for the source code referenced by the BioCompute.

+ + + +
+
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/external-references/index.html b/public/external-references/index.html new file mode 100644 index 0000000..92b0df0 --- /dev/null +++ b/public/external-references/index.html @@ -0,0 +1,225 @@ + + + + + + + External References - BioCompute Object Documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+ + + +
+BioCompute Logo +
+
+

This document is part of the BioCompute Object User Guide

+

Back to BCO domains

+

3.2 Appendix-II: External reference database list

+

This list contains the databases that are currently being used in our BCOs. We use the CURIEs that map to URIs maintained by http://identifiers.org/

+
+

Identifiers.org is an established resolving system the enables the referencing of data for the scientific community, with a current focus on the Life Sciences domain. Identifiers.org provides direct access to the identified data using one selected physical location (or resource). Where multiple physical locations are recorded in the registry the most stable one is selected for resolution. This allows the location independent referencing (and resolution if required) of data records."

+
+

In the entries below the namespace and identifier combine to become the CURIEs.

+
Recommended name: Taxonomy
+Namespace: taxonomy
+Identifier pattern: ^\d+$
+Registry identifier: MIR:00000006
+URI: http://identifiers.org/taxonomy/
+
+
+Recommended name: Sequence Ontology
+Namespace: so
+Identifier pattern: ^SO:\d{7}$
+Registry identifier: MIR:00000081
+URI: http://identifiers.org/so/
+
+Recommended name: PubMed
+Namespace: pubmed
+Identifier pattern: ^\d+$
+Registry identifier: MIR:00000015
+URI: http://identifiers.org/pubmed/
+
+Recommended name: PubChem-compound
+Namespace: pubchem.compound
+Identifier pattern: ^\d+$
+Registry identifier: MIR:00000034
+URI: http://identifiers.org/pubchem.compound/
+
+

For instance, the inline CURIE [taxonomy:31646] expands to http://identifiers.org/taxonomy/31646 as the namespace taxonomy corresponds to the prefix http://identifiers.org/taxonomy/ to be augmented with the identifier 31646. Resolving the resulting URI will redirect (currently to https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=31646) showing that term [taxonomy:31646] means Hepatitis C virus subtype 1a in the NCBI Taxonomy browser.

+

Note that some identifier patterns result in a repetition when combined with the prefix, e.g. [so:SO:0000667] expands to http://identifiers.org/so/SO:0000667 where so: is the prefix and SO: is part of the Sequence Ontology identifier.

+

References

+

McMurry JA et al: Identifiers for the 21st century: How to design, provision, and reuse persistent identifiers to maximize utility and impact of life science data. PLoS Biology 15(6): e2001414. +https://doi.org/10.1371/journal.pbio.2001414

+ + + +
+
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/images/certification_requirements.png b/public/images/certification_requirements.png new file mode 100644 index 0000000..eb88ff5 Binary files /dev/null and b/public/images/certification_requirements.png differ diff --git a/public/images/favicon.png b/public/images/favicon.png new file mode 100644 index 0000000..3267b2c Binary files /dev/null and b/public/images/favicon.png differ diff --git a/public/images/logo.about.png b/public/images/logo.about.png new file mode 100644 index 0000000..8f948f6 Binary files /dev/null and b/public/images/logo.about.png differ diff --git a/public/images/logo.helix_only.png b/public/images/logo.helix_only.png new file mode 100644 index 0000000..914338a Binary files /dev/null and b/public/images/logo.helix_only.png differ diff --git a/public/index.html b/public/index.html new file mode 100644 index 0000000..82e689d --- /dev/null +++ b/public/index.html @@ -0,0 +1,212 @@ + + + + + + + + BioCompute Object Documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+
+ + + +
+BioCompute Logo +
+
+

The BioCompute Standard

+

Because of the many different ways to organize data, a major goal of the BioCompute project is to build and maintain a formal standard through recognized, accredited standards setting organizations like the Institute for Electrical and Electronics Engineers (IEEE) and the International Standards Organization (ISO). A formal, consensus-based standard builds predictability and even more stability into the way in which bioinformatic methods are communicated.

+

The standard, officially known as 2791-2020, has two parts: the standards document and the schema, which is maintained in an open source repository:

+
    +
  • The current version of the standard can be found here.
  • +
  • The schema can be found here.
  • +
+

Since the base BioCompute schema is maintained as an open source repository, it can be cloned and integrated into an organization in unique ways, which allows organizations to build off of this schema to create dependent standards for specific applications. This is similar to the different versions of WiFi based on usage, such as the 802.11a standard for fast speed, but high cost and shorter range, or the 802.11b for slower top speed, but lower cost, etc. — all of which are built on the 802.11 base standard. It can also be used to further extend the schema, such as for handling proprietary, internal content, while still being compatible with the base standard. The open source schema also enables individuals or organizations to suggest changes to be incorporated into future versions the standard.

+

Citation

+

This standard was originaly prepared by The BioCompute Object working group during preparation for the 2017 HTS Computational Standards for Regulatory Sciences Workshop.

+

To reference the BCO standards, please use the following +citation inclusive of the DOI:

+

Simonyan, V., Goecks, J., & Mazumder, R. (2017). Biocompute Objects — A Step towards Evaluation and Validation of Biomedical Scientific Computations. PDA Journal of Pharmaceutical Science and Technology, 71(2), 136–146. doi: 10.5731/pdajpst.2016.006734

+

Support, Community and Contributing

+

To suggest changes to this repository we welcome contributions as a pull request or issue submission.

+

BCO_Specification is licensed under the BSD 3-Clause “New” or “Revised” License

+
+

A permissive license similar to the BSD 2-Clause License, but with a 3rd clause that prohibits others from using the name of the project or its contributors to promote derived products without written consent.

+
+

Mailing List

+

As a subscriber to the BCO mailing list, you can post to it by sending a message tobiocomputels@hermes.gwu.edu (using the email address that is subscribed). This list is semi-automated and will send your message for review.

+

To subscribe or unsubscribe, please visit https://hermes.gwu.edu/cgi-bin/wa?A0=BIOCOMPUTELS and click Subscribe or Unsubscribe on the lower right. You can also unsubscribe from the list at any time by sending an email to listserv@hermes.gwu.edu, in which the body says: unsubscribe biocomputels

+

This repository is in support of 2791-2020 - IEEE Approved Draft Standard for Bioinformatics Computations and Analyses Generated by High-Throughput Sequencing (HTS) to Facilitate Communication. Please also see our OSF page or our main page

+ + + +
+
+
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/index.xml b/public/index.xml new file mode 100644 index 0000000..e73374e --- /dev/null +++ b/public/index.xml @@ -0,0 +1,217 @@ + + + + Home on BioCompute Object Documentation + / + Recent content in Home on BioCompute Object Documentation + Hugo -- gohugo.io + en-us + + + /examples/readme/ + Mon, 01 Jan 0001 00:00:00 +0000 + + /examples/readme/ + BCO Examples A repository for BCO example flat files. +Table of Contents: HCV1a - This BCO was developed with the Reproducibility and Interpretation use case in mind. This is the archetypal BCO example and is in the BCO Specification repository. + glycosylation-sites-UniCarbKB - This BCO was developed with the Data integration use case in mind. The full repository is available here + UVP - This BCO was developed with the Accountability use case in mind. + + + + + /release_protocol/ + Mon, 01 Jan 0001 00:00:00 +0000 + + /release_protocol/ + BioCompute Release Protocol Prep + Create a release issue: release_1.4.0. Set freeze date freeze date [1/24/2020]. Branch Release (on or around freeze date) + Ensure all blocking milestone issues have been closed. Merge the latest release into dev and push upstream. Deploy and Test Release + Review issues and ensure they all have a milestones attached. Link Checkout release branch. Run schemas/validate.py on each of the examples in examples/*, updating if necessary. + + + + BCO Best Practice + /best_practices/ + Mon, 01 Jan 0001 00:00:00 +0000 + + /best_practices/ + ((window.gitter = {}).chat = {}).options = { room: 'biocompute-objects/BCO_Specification' }; BioCompute Objects Best Practice General The required domains are defined by the IEEE . However, a BioCompute Object is considered complete when an Error Domain exists. Versioning is allowed, but only if the changes do not affect the workflow or output. BCO versioning follows a minor.patch schema, no major versions are allowed (substantial changes result in a new BCO). + + + + BCO Curation SOP + /sop/ + Mon, 01 Jan 0001 00:00:00 +0000 + + /sop/ + ((window.gitter = {}).chat = {}).options = { room: 'biocompute-objects/BCO_Specification' }; BCO Curation SOP Author: BioCompute Consortium Version: 2.0 Effective Date: Aug 2020 Intended audience: authors and developers +The following recommendations are intended to provide guidance on BCO™ creation, versioning, certification and authentication. +BCO IDs and Versioning Intended Audience: BCO authors + BioCompute IDs are used as persistent URLs. A novel usability domain must result in the creation of a new BCO with a new BCO ID. + + + + BCO Domains + /bco-domains/ + Mon, 01 Jan 0001 00:00:00 +0000 + + /bco-domains/ + ((window.gitter = {}).chat = {}).options = { room: 'biocompute-objects/BCO_Specification' }; This document is part of the BioCompute Object User Guide +BCO domains A BCO JSON object is split into different parts, or domains, detailed below. +Condensed example: +{ &#34;spec_version&#34; : &#34;https://w3id.org/biocompute/1.3.0/&#34;, &#34;object_id&#34;: &#34;https://example.com/bco/9487ae7e-c1aa-4a3c-b18f-3d3695b33ace&#34;, &#34;type&#34;: &#34;antiviral_resistance_detection&#34;, &#34;etag&#34;: &#34;584C7FE128717E1712426AB19CAAEA8BC1E27365B54285BBEA1221284C7D3A48&#34;, &#34;provenance_domain&#34;: { }, &#34;usability_domain&#34;: [ ], &#34;extension_domain&#34;:{ &#34;fhir_extension&#34;: [ ], &#34;scm_extension&#34;: { } }, &#34;description_domain&#34;: { }, &#34;execution_domain&#34;: { }, &#34;parametric_domain&#34;: { }, &#34;io_domain&#34;: { }, &#34;error_domain&#34;: { } } 2. + + + + BCO Introduction + /extension-fhir/ + Mon, 01 Jan 0001 00:00:00 +0000 + + /extension-fhir/ + ((window.gitter = {}).chat = {}).options = { room: 'biocompute-objects/BCO_Specification' }; This document is part of the BioCompute Object User Guide +Back to BCO domains +2.3.1 Extension to External References: SMART on FHIR Genomics The external references example extension to FHIR resource demonstrates how specific data elements can be extracted from EHR systems or other secure FHIR endpoints via technologies such as SMART on FHIR Genomics (https://www.ncbi.nlm.nih.gov/pubmed/26198304) without compromising patient and providers’ information. + + + + BCO Introduction + /introduction/ + Mon, 01 Jan 0001 00:00:00 +0000 + + /introduction/ + ((window.gitter = {}).chat = {}).options = { room: 'biocompute-objects/BCO_Specification' }; This document is part of the BioCompute Object User Guide +1 Introduction to BioCompute Objects BioCompute is a paradigm and a BioCompute Object (BCO) is an instance of that paradigm. High-throughput sequencing (HTS), also referred to as next-generation sequencing (NGS) or massively parallel sequencing (MPS), has increased the pace at which we generate, compute and share genomic data in biomedical sciences. + + + + Description Domain + /description-domain/ + Mon, 01 Jan 0001 00:00:00 +0000 + + /description-domain/ + ((window.gitter = {}).chat = {}).options = { room: 'biocompute-objects/BCO_Specification' }; This document is part of the BioCompute Object User Guide +Back to BCO domains +2.4 Description Domain &ldquo;description_domain&rdquo; This section defines the fields of the description_domain part of the BCO structure. +Structured field for description of external references, the pipeline steps, and the relationship of I/O objects. Information in this domain is not used for computation. This domain is meant to capture information that is currently being provided in FDA submission in journal format. + + + + Error Domain + /error-domain/ + Mon, 01 Jan 0001 00:00:00 +0000 + + /error-domain/ + ((window.gitter = {}).chat = {}).options = { room: 'biocompute-objects/BCO_Specification' }; This document is part of the BioCompute Object User Guide +Back to BCO domains +2.8 Error Domain, acceptable range of variability &ldquo;error_domain&rdquo; The error domain can be used to determine what range of input returns outputs that are within the tolerance level defined in this subdomain and therefore can be used to optimize algorithm. It consists of two subdomains: empirical and algorithmic. + + + + Execution Domain + /execution-domain/ + Mon, 01 Jan 0001 00:00:00 +0000 + + /execution-domain/ + ((window.gitter = {}).chat = {}).options = { room: 'biocompute-objects/BCO_Specification' }; This document is part of the BioCompute Object User Guide +Back to BCO domains +2.5 Execution Domain &ldquo;execution_domain&rdquo; This section defines the execution_domain part of the BCO. +The fields required for execution of the BCO have been encapsulated together in order to clearly separate information needed for deployment, software configuration and running applications in a dependent environment. + + + + Extension to External References: Software Configuration Management (SCM) + /extension-scm/ + Mon, 01 Jan 0001 00:00:00 +0000 + + /extension-scm/ + ((window.gitter = {}).chat = {}).options = { room: 'biocompute-objects/BCO_Specification' }; This document is part of the BioCompute Object User Guide +Back to BCO domains +2.3.2 Extension to External References: Software Configuration Management (SCM) The external references example extension to a SCM repository demonstrates how a BioCompute Object software source code can be stored/deposited/downloaded. The BCO would contain links to the SCM repository where the information is stored and easily retrieved. + + + + External References + /external-references/ + Mon, 01 Jan 0001 00:00:00 +0000 + + /external-references/ + ((window.gitter = {}).chat = {}).options = { room: 'biocompute-objects/BCO_Specification' }; This document is part of the BioCompute Object User Guide +Back to BCO domains +3.2 Appendix-II: External reference database list This list contains the databases that are currently being used in our BCOs. We use the CURIEs that map to URIs maintained by http://identifiers.org/ + Identifiers.org is an established resolving system the enables the referencing of data for the scientific community, with a current focus on the Life Sciences domain. + + + + I/O Domain + /io-domain/ + Mon, 01 Jan 0001 00:00:00 +0000 + + /io-domain/ + ((window.gitter = {}).chat = {}).options = { room: 'biocompute-objects/BCO_Specification' }; This document is part of the BioCompute Object User Guide +Back to BCO domains +2.7 Input and Output Domain &ldquo;io_domain&rdquo; This section defines the io_domain part of the BCO. +This represents the list of global input and output files created by the computational workflow, excluding the intermediate files. These fields are pointers to objects that can reside in the system performing the computation or any other accessible system. + + + + Parametric Domain + /parametric-domain/ + Mon, 01 Jan 0001 00:00:00 +0000 + + /parametric-domain/ + ((window.gitter = {}).chat = {}).options = { room: 'biocompute-objects/BCO_Specification' }; This document is part of the BioCompute Object User Guide +Back to BCO domains +2.6 Parametric Domain &ldquo;parametric_domain&rdquo; This represents the list of NON-default parameters customizing the computational flow which can affect the output of the calculations. These fields can be custom to each kind of analysis and are tied to a particular pipeline implementation. The parametric_domain is not used for running/reproducing a bco e. + + + + Provenance Domain + /provenance-domain/ + Mon, 01 Jan 0001 00:00:00 +0000 + + /provenance-domain/ + ((window.gitter = {}).chat = {}).options = { room: 'biocompute-objects/BCO_Specification' }; This document is part of the BioCompute Object User Guide +Back to BCO domains +2.1 Provenance Domain &ldquo;provenance_domain&rdquo; This section defines the fields of the provenance_domain part of the BCO structure. +Condensed example: +&#34;provenance_domain&#34;: { &#34;name&#34;: &#34;HCV1a ledipasvir resistance SNP detection&#34;, &#34;version&#34;: &#34;2.9&#34;, &#34;review&#34;: [ ], &#34;obsolete_after&#34; : &#34;2118-09-26T14:43:43-0400&#34;, &#34;embargo&#34; : { }, &#34;created&#34;: &#34;2017-01-24T09:40:17-0500&#34;, &#34;modified&#34;: &#34;2018-09-21T14:06:14-0400&#34;, &#34;contributors&#34;: [ ], &#34;license&#34;: &#34;https://spdx. + + + + Top Level Domains + /top-level/ + Mon, 01 Jan 0001 00:00:00 +0000 + + /top-level/ + ((window.gitter = {}).chat = {}).options = { room: 'biocompute-objects/BCO_Specification' }; This document is part of the BioCompute Object User Guide +Back to BCO domains +2.0 Top Level Fields These header fields uniquely define this BCO. These fields are required for every BCO and are represented at the top level object. +Condensed example: +{ &#34;spec_version&#34; : &#34;https://w3id.org/ieee/ieee-2791-schema/&#34;, &#34;object_id&#34;: &#34;https://example.com/bco/9487ae7e-c1aa-4a3c-b18f-3d3695b33ace&#34;, &#34;etag&#34;: &#34;d41d8cd98f00b204e9800998ecf8427e&#34;, &#34;provenance_domain&#34;: { }, &#34;...&#34;: { } } 2. + + + + Usability Domain + /usability-domain/ + Mon, 01 Jan 0001 00:00:00 +0000 + + /usability-domain/ + ((window.gitter = {}).chat = {}).options = { room: 'biocompute-objects/BCO_Specification' }; This document is part of the BioCompute Object User Guide +Back to BCO domains +2.2 Usability Domain &ldquo;usability_domain&rdquo; This section defines the usability_domain part of the BCO structure. +This field provides a space for the author to define the usability domain of the BCO. It is an array of free text values that should be consistant with terminology used in the name, external references (xref), and keywords sections. + + + + User Guide + /user_guide/ + Mon, 01 Jan 0001 00:00:00 +0000 + + /user_guide/ + ((window.gitter = {}).chat = {}).options = { room: 'biocompute-objects/BCO_Specification' }; This document was created by the BioCompute Object Consortium members (BCOC) BioCompute Object (BCO) User Guide This version: draft-2.0.0 This version is offerd as support for 2791-2020 - IEEE Approved Standard for Bioinformatics Computations and Analyses Generated by High-Throughput Sequencing (HTS) to Facilitate Communication. Previous version: 1.4.0 +Latest release: https://github.com/biocompute-objects/BCO_Specification/releases/latest Latest editor&rsquo;s draft: https://github.com/biocompute-objects/BCO_Specification/tree/dev Note that unless you are viewing a release this is a draft subject to change. + + + + diff --git a/public/introduction/index.html b/public/introduction/index.html new file mode 100644 index 0000000..6ed954d --- /dev/null +++ b/public/introduction/index.html @@ -0,0 +1,250 @@ + + + + + + + BCO Introduction - BioCompute Object Documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+ + + +
+BioCompute Logo +
+
+

This document is part of the BioCompute Object User Guide

+

1 Introduction to BioCompute Objects

+

BioCompute is a paradigm and a BioCompute Object (BCO) is an instance of that paradigm. High-throughput sequencing (HTS), also referred to as next-generation sequencing (NGS) or massively parallel sequencing (MPS), has increased the pace at which we generate, compute and share genomic data in biomedical sciences. As a result, scientists, clinicians and regulators are now faced with a new data paradigm that is less portable, more complex and most of all poorly standardized. BCOs use a simple implementation of the JSON schema to encode important information on the execution of computational pipelines, or for the creation of knowledgebases. BioCompute can be process oriented (for software pipelines) and/or product oriented (for knowledge bases). The goal of using a BCO is to streamline communication of these details between stakeholders in academia, industry and regulatory agencies.

+

The US Food and Drug Administration (FDA) and George Washington University (GW) have partnered to establish a framework for community-based standards development and harmonization of HTS computations and data formats. Standardized HTS data processing and data formats will promote interoperability and simplify the verification of bioinformatics protocols. To do this, a schema has been developed to represent instances of computational analysis as a BCO. A BCO includes:

+
    +
  • Information about parameters and versions of the executable programs in a pipeline
  • +
  • Reference to input and output test data for verification of the pipeline
  • +
  • A usability domain
  • +
  • Keywords
  • +
  • A list of agents involved along with other important metadata, such as their specific contribution
  • +
+

Knowledge of input data is intended to be captured according to existing efforts, including MIRAGE, MIAPE, and STRENDA, and to be in accordance with Minimum Information Standards. In addition to all the information captured in the BCO, the BCO itself must be independent of the execution environment, whether it is a local high-performance or a cloud-based infrastructure.

+

Additional, non-normative, information on BCOs:

+

https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5510742/

+

1.1 Mission of the BioCompute project

+
    +
  • Develop BioCompute Objects that will facilitate communication of HTS computational analysis details with the FDA.
  • +
  • Develop a community of stakeholders to create a versatile data harmonization framework that allows the standardized definition of platform-independent bioinformatics pipelines for execution, and is easily read by humans AND machines.
  • +
  • Facilitate the development of tools and facilities implementing data typing, instantiation, deposition, storage, and distribution of validated BioCompute Objects through a BioCompute database, in order to enable reproducible scientific research and regulatory submissions of data and computations.
  • +
  • Facilitate portability of pipelines for execution on Public Cloud infrastructure.
  • +
+

1.2 Motivation

+

The unpredictability of tangible physical, chemical, and biological experiments due to the multitude of environmental and procedural factors is well documented. What is often systematically overlooked is that computational biology algorithms are affected by a multiplicity of parameters and are no less volatile. The complexities of computation protocols and interpretation of outcomes is only part of the challenge; there are also virtually no standardized and industry-accepted metadata schemas for reporting the computational pipelines and parameters together with their results. Thus, it is often impossible to reproduce the results of a previously performed computation due to missing information on parameters, versions, arguments, conditions, and procedures of application launch. The BCO concept has been developed specifically to satisfy regulatory research needs for evaluation, validation, and verification of bioinformatics pipelines; however, there is potential utility of BCO within the larger scientific community. This utility can be increased through the creation of a BCO database comprising records relevant to the U.S. Food and Drug Administration.

+

A BioCompute Object database record will be similar to a GenBank record in form; however, instead of describing a sequence, the BioCompute record will include information related to parameters, dependencies, usage, and other information related to the specific computational instance. This mechanism will extend similar efforts and also serve as a collaborative ground to ensure interoperability between different platforms, industries, scientists, regulators, and other stakeholders interested in biocomputing.

+

For more information, see the project description on the FDA Extramural Research page.

+

1.2.1 Limitations of the initial effort

+
    +
  • At the initial stages of BioCompute development, we address the challenges of HTS (NGS) bioinformatics.
  • +
  • BCOs could very easily be extended to other types of computational analysis, and at this stage, we are limiting our focus to HTS analysis and database creation.
  • +
+

1.3 Audience for this document

+
    +
  • Users performing HTS analysis with a regulatory science perspective
  • +
  • HTS Platform Developers
  • +
  • HTS related standard developers
  • +
+

1.4 Potential Stakeholders for the BioCompute project

+
    +
  • US Food and Drug Administration, as well as other Regulatory Agencies
  • +
  • Medical product manufacturers and their suppliers
  • +
  • Laboratories developing clinical testing protocols
  • +
  • Bioinformatics tool and platform developers who wish to operate in a regulatory environment, including cloud service (PaaS, IaaS, SaaS, FaaS) providers
  • +
  • Journals / Scientific Publishing / peer reviewing process
  • +
  • US National Institutes of Health (NIH) (particularly initiatives such as NCI/ITCR)
  • +
  • Public cloud companies operating in the Life Sciences sector including electronic health record (EHR) systems
  • +
+

1.5 BCO User stories

+

Reproducibility and Interpretation use case

+

A pharmaceutical company is submitting NGS data and the FDA conducts a reanalysis of the data. The reanalysis does not concur with the original results. It can be very lengthy and costly to figure out the location of the discrepancies. Attaching a BioCompute Object with the initial submission would prevent most of the ambiguity surrounding the discrepancies.

+

Reusability use case

+

A regulatory decision has been made where a computational analysis has been used as evidence. New data emerges after the product has been on the market over a year and the regulators cannot reproduce the original environment with the configuration of tools and parameters of pipelines to reanalyze the initial submission data or replicate the initial conclusion.

+

Collaboration use case

+

Authors and pharmaceutical scientists are unaware of how the regulatory industry is using workflows to analyze data. Openness and transparency are hindered by the lack of ability to communicate, not a lack of willingness. Scientific merit is compromised as a result of not having a common “language” for communicating computations.

+

Accountability use case

+

A bioinformatics platform provider can use BCO as part of its verification and validation process. A customer submits NGS data provided by a third party sequencing provider. The sequencing data is poor quality. Reproducible pipelines, validated and verified as a “BCO”, were used to demonstrate the fault lies in the sequencing step and not the bioinformatics pipeline.

+

Versioning use case

+

One potential use case related to this is one of ‘differential impact’ of how different choices in the workflow affect the outcome of the computational analysis/experiment (e.g. changing expression estimation procedure).

+

Provenance use case

+

BCOs can serve as a history of what was computed. An example pertaining to provenance, from experience: data are generated and QC’ed as far as possible, and then passed on for analysis. The analysis diagnoses a problem with one or more samples (e.g., cryptic relatedness), which are then locally excluded from the analysis. But that exclusion is not reflected back to the original data, and the same bad samples are included in the next analysis. In this way, a record exists of which samples can be excluded in future analysis.

+

Data integration use case

+

A BCO can be used to provide clarity and transparency of the data integration process to both the new and existing collaborators. When new data is integrated into the existing data model, BCO can be used to describe data source information (eg- authors/contributors, data version etc), a QC workflow, data content, data modification if any. The BCO also allows reuse of the same workflow to integrate new data with same structure and source. BCO also provides a way to access and track data records which were eliminated in the integration/QC process due to rules or restrictions of the existing data model. Knowledgebases using BCOs in the form of ‘readme’ can provide provenance for every piece of data that is collected and presented to the user. Such granular tracking facilitates fair sharing of data and provides mechanisms for adherence to licensing requirements associated with specific datasets.

+

1.6 BCO community

+

The BioCompute Object working group facilitates a means for different stakeholders in the HTS communities to provide input on current practices on the BCO. This working group was formed during preparation for the 2017 HTS Computational Standards for Regulatory Sciences Workshop, and was initially made up of the workshop participants, both speakers and panelists. There has been a continual growth of the BCO working group as a direct result of the interaction between the stakeholders interested in standardization of computational HTS data processing.

+ + + +
+
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/io-domain/index.html b/public/io-domain/index.html new file mode 100644 index 0000000..f362ecf --- /dev/null +++ b/public/io-domain/index.html @@ -0,0 +1,235 @@ + + + + + + + I/O Domain - BioCompute Object Documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+ + + +
+BioCompute Logo +
+
+

This document is part of the BioCompute Object User Guide

+

Back to BCO domains

+

2.7 Input and Output Domain “io_domain”

+

This section defines the io_domain part of the BCO.

+

This represents the list of global input and output files created by the computational workflow, excluding the intermediate files. These fields are pointers to objects that can reside in the system performing the computation or any other accessible system. Just like the fields of parametric domain, these fields are expected to vary depending on the specific BCO implementation and can refer to named input output arguments of underlying pipelines. Please refer to documentation of individual scripts and specific BCO descriptions for further details.

+

Condensed example:

+
"io_domain": {
+        "input_subdomain": [
+        ], 
+        "output_subdomain": [
+        ]
+}, 
+

2.7.1 Input Subdomain “input_subdomain”

+

This field records the references and input files for the entire pipeline. Each input file is listed as a uri object. This allows the author to be very specific about a particular type of input file, if they so choose. For example: reference files have common names, and adding the common name here, in addition to the uri would make this more readable and understandable (eg, "HCV reference version..." or "human reference GRCH38"). For data integration workflows, the input files can be a table downloaded from a specific source which is then filtered for modified using rules described in the BCO. It is recommended that the values here include filename, uri, and access_time.

+
        "input_subdomain": [
+            {
+                "uri": {
+                    "filename": "Hepatitis C virus genotype 1", 
+                    "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792",
+                    "access_time": "2017-01-24T09:40:17-0500"
+                }
+            }, 
+            {
+                "uri": {
+                    "filename": "Hepatitis C virus type 1b complete genome", 
+                    "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376",
+                    "access_time": "2017-01-24T09:40:17-0500"
+                }
+            }
+        ]
+

2.7.2 Output Subdomain “output_subdomain”

+

This field records the outputs for the entire pipeline. Each output object is represented as a uri with the addition of a mediatype value.

+
        "output_subdomain": [
+            {
+                "mediatype": "text/csv", 
+                "uri": { 
+                    "uri": "http://example.com/data/514769/dnaAccessionBased.csv",
+                    "access_time": "2017-01-24T09:40:17-0500"
+                }
+            },
+            {
+                "mediatype": "text/csv", 
+                "uri": {
+                    "uri": "http://example.com/data/514801/SNPProfile*.csv",
+                    "access_time": "2017-01-24T09:40:17-0500"
+                }
+            }
+        ]
+
+ + +
+
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/js/math-code.js b/public/js/math-code.js new file mode 100644 index 0000000..a435806 --- /dev/null +++ b/public/js/math-code.js @@ -0,0 +1,20 @@ +(function() { + var i, text, code, codes = document.getElementsByTagName('code'); + for (i = 0; i < codes.length;) { + code = codes[i]; + if (code.parentNode.tagName !== 'PRE' && code.childElementCount === 0) { + text = code.textContent; + if (/^\$[^$]/.test(text) && /[^$]\$$/.test(text)) { + text = text.replace(/^\$/, '\\(').replace(/\$$/, '\\)'); + code.textContent = text; + } + if (/^\\\((.|\s)+\\\)$/.test(text) || /^\\\[(.|\s)+\\\]$/.test(text) || + /^\$(.|\s)+\$$/.test(text) || + /^\\begin\{([^}]+)\}(.|\s)+\\end\{[^}]+\}$/.test(text)) { + code.outerHTML = code.innerHTML; // remove + continue; + } + } + i++; + } +})(); diff --git a/public/parametric-domain/index.html b/public/parametric-domain/index.html new file mode 100644 index 0000000..19953d0 --- /dev/null +++ b/public/parametric-domain/index.html @@ -0,0 +1,198 @@ + + + + + + + Parametric Domain - BioCompute Object Documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+ + + +
+BioCompute Logo +
+
+

This document is part of the BioCompute Object User Guide

+

Back to BCO domains

+

2.6 Parametric Domain “parametric_domain”

+

This represents the list of NON-default parameters customizing the computational flow which can affect the output of the calculations. These fields can be custom to each kind of analysis and are tied to a particular pipeline implementation. The parametric_domain is not used for running/reproducing a bco e.g. not used by the execution_domain. It is recommended these fields be generated automatically, but that may not always be possible. Please refer to documentation of individual scripts and specific BCO descriptions for details. While this domain is NOT required, it is recommended that it be populated.

+
"parametric_domain": [
+    {"param": "seed", "value": "14", "step": "1"},
+    {"param":"minimum_match_len", "value": "66", "step": "1"},
+    {"param": "divergence_threshold_percent", "value": 0.30, "step": "1"},
+    {"param": "minimum_coverage", "value": "15", "step": "2"},
+    {"param": "freq_cutoff", "value": 0.10, "step": "2"}
+]	
+
+ + +
+
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/provenance-domain/index.html b/public/provenance-domain/index.html new file mode 100644 index 0000000..27ade4d --- /dev/null +++ b/public/provenance-domain/index.html @@ -0,0 +1,298 @@ + + + + + + + Provenance Domain - BioCompute Object Documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+ + + +
+BioCompute Logo +
+
+

This document is part of the BioCompute Object User Guide

+

Back to BCO domains

+

2.1 Provenance Domain “provenance_domain”

+

This section defines the fields of the provenance_domain part of the BCO structure.

+

Condensed example:

+
"provenance_domain": {
+        "name": "HCV1a ledipasvir resistance SNP detection", 
+        "version": "2.9",
+        "review": [
+        ],
+        "obsolete_after" : "2118-09-26T14:43:43-0400",
+        "embargo" : {
+	},
+        "created": "2017-01-24T09:40:17-0500", 
+        "modified": "2018-09-21T14:06:14-0400", 
+        "contributors": [
+        ],
+        "license": "https://spdx.org/licenses/CC-BY-4.0.html"
+},
+

2.1.1 Name “name”

+

Name for the BCO. This public field should take free text value using common biological research terminology supporting the terminology used in the usability_domain, external references (xref), and keywords sections.

+
"name": "HCV1a ledipasvir resistance SNP detection"
+

2.1.2 Version “version”

+

Records the versioning of this BCO instance object. Semantic Versioning 2.0.0 describes versioning as follows:

+
+

Given a version number MAJOR.MINOR.PATCH, increment the:

+
    +
  1. MAJOR version when you make incompatible API changes,
  2. +
  3. MINOR version when you add functionality in a backwards-compatible manner, and
  4. +
  5. PATCH version when you make backwards-compatible bug fixes. +Additional labels for pre-release and build metadata are available as extensions to the MAJOR.MINOR.PATCH format.
  6. +
+
+

BCO versioning should adhere to semantic versioning. Given the above conditions a MAJOR version would qualify for a new BCO, and therefore it is RECCOMENDED that the versioning of a BCO only utilize MINOR and PATCH, or two digits.

+
"version": "2.1",
+

2.1.3 Review “review”

+

This is an array to hold reviewer identifiers and a description of the status of an object in the review process. The subtype reviewer contains field(s) for name, affiliation, email, and the contribution type of the reviewer. To further record author information, ORCID IDs are included as they allow for the author to curate their information after submission. ORCID identifiers must be valid and must have the prefix https://orcid.org/. The contribution type is a choice taken from PAV ontology: provenance, authoring and versioning, which also maps to the PROV-O.

+

The “status” key describes the status of an object in the review process and the following are the possible values:

+
    +
  • unreviewed flag indicates that the object has been submitted, but no further evaluation or verification has occurred.
  • +
  • in-review flag indicates that verification is underway.
  • +
  • approved flag indicates that the BCO has been verified and reviewed.
  • +
  • suspended flag indicates an object that was once valid is no longer considered valid.
  • +
  • rejected flag indicates that an error or inconsistency was detected in the BCO, and it has been removed or rejected.
  • +
+

The fields from the contributor object (described in section 2.1.9) are used to populate the reviewer section. Each BCO SHOULD have at least one review.

+
        "review": [
+            {
+                "status": "approved",
+                "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer",
+                "date": "2017-11-12T12:30:48-0400",
+                "reviewer": {
+                    "name": "Charles Hadley King", 
+                    "affiliation": "George Washington University", 
+                    "email": "hadley_king@gwu.edu",
+                    "contribution": "curatedBy",
+                    "orcid": "https://orcid.org/0000-0003-1409-4549"
+                }
+            },
+            {
+                "status": "approved",
+                "reviewer_comment": "The revised BCO looks fine",
+                "date": "2017-12-12T12:30:48-0400",
+                "reviewer": {
+                    "name": "Eric Donaldson", 
+                    "affiliation": "FDA", 
+                    "email": "Eric.Donaldson@fda.hhs.gov",
+                    "contribution": "curatedBy"
+                }
+            }
+        ]
+

2.1.4 Inheritance/derivation “derived_from”

+

If the object is derived from another, this field will specify the parent object, in the form of the ‘object_id’. If the object is novel than the field is not included.

+
"derived_from" : "https://github.com/biocompute-objects/BCO_Specification/blob/1.2.1-beta/HCV1a.json"
+
+

2.1.5 Obsolescence “obsolete_after”

+

If the object has an expiration date this field will specify that using the ‘datetime’ type which is in ISO-8601 format as clarified by W3C https://www.w3.org/TR/NOTE-datetime. This field is optional.

+
"obsolete_after" : "2118-09-26T14:43:43-0400"
+

2.1.6 Embargo “embargo”

+

If the object has a period of time that it is not public, that range can be specified using these fields. Using the datetime type, a start and end time are specified for the embargo. These fields are optional.

+
"embargo" : {
+    "start_time": "2000-09-26T14:43:43-0400",
+    "end_time": "2000-09-26T14:43:45-0400"
+},
+

2.1.7 Created “created”

+

Using the datetime type the time of initial creation of the BCO is recorded in ISO-8601 format as clarified by W3C https://www.w3.org/TR/NOTE-datetime. This field should be readOnly.

+
"created": "2017-01-20T09:40:17-0500"
+

2.1.8 Modification “modified”

+

Using the datetime type the time of most recent modification of the BCO is recorded

+
"modified": "2018-03-21T18:31:48-0400"
+

2.1.9 Contributors “contributors”

+

This is a list to hold contributor identifiers and a description of their type of contribution, including a field for ORCIDs to record author information, as they allow for the author to curate their information after submission. ORCID identifiers must be valid and must have the prefix https://orcid.org/. The contribution type is a choice taken from PAV ontology: provenance, authoring and versioning, which also maps to the PROV-O.

+
        "contributors": [
+            {
+                "name": "Charles Hadley King", 
+                "affiliation": "George Washington University", 
+                "email": "hadley_king@gwu.edu",
+                "contribution": ["createdBy", "curatedBy"],
+                "orcid": "https://orcid.org/0000-0003-1409-4549"
+            },
+
+            {
+                "name": "Eric Donaldson", 
+                "affiliation": "FDA", 
+                "email": "Eric.Donaldson@fda.hhs.gov",
+                "contribution": ["authoredBy"]
+            }
+        ]
+

2.1.10 License “license”

+

A space for Creative Commons licence or other license information (text). The default or recommended licence can be Creative Commons Attribution 4.0 International identified as https://spdx.org/licenses/CC-BY-4.0.html

+
"license": "https://spdx.org/licenses/CC-BY-4.0.html"
+
+ + +
+
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/release_protocol/index.html b/public/release_protocol/index.html new file mode 100644 index 0000000..52bc122 --- /dev/null +++ b/public/release_protocol/index.html @@ -0,0 +1,237 @@ + + + + + + + - BioCompute Object Documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+ +

BioCompute Release Protocol

+
    +
  • +

    Prep

    +
      +
    • Create a release issue: release_1.4.0.
    • +
    • Set freeze date freeze date [1/24/2020].
    • +
    +
  • +
  • +

    Branch Release (on or around freeze date)

    + +
  • +
  • +

    Deploy and Test Release

    +
      +
    • Review issues and ensure they all have a milestones attached. Link
    • +
    • Checkout release branch.
    • +
    • Run schemas/validate.py on each of the examples in examples/*, updating if necessary.
    • +
    • Check for obvious missing or inconsistent documentation in release.
    • +
    +
  • +
  • +

    Create Release Notes

    +
      +
    • Open newly edited/created files and manually curate for accuracy.
    • +
    • If any changes are made, ensure you commit branch again and document in commit what was changed.
    • +
    • Run sh scripts/log.sh from the repo root.
    • +
    • Check for obvious missing or inconsistent documentation in release notes (CHANGELOG.md).
    • +
    • Commit branch.
    • +
    +
  • +
  • +

    Do Release

    + +
  • +
  • +

    Announce Release

    + +
  • +
  • +

    Prepare for next release

    +
      +
    • Close milestone ${version} and ensure milestone ${next_version} exists.
    • +
    • Create release issue for next version release_${next_version}.
    • +
    • Close this issue.
    • +
    +
  • +
+ + + +
+
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/sitemap.xml b/public/sitemap.xml new file mode 100644 index 0000000..938ed4b --- /dev/null +++ b/public/sitemap.xml @@ -0,0 +1,50 @@ + + + + /examples/readme/ + + /release_protocol/ + 0 + + /best_practices/ + + /sop/ + + /bco-domains/ + + /extension-fhir/ + + /introduction/ + + /categories/ + + /description-domain/ + + /error-domain/ + + /examples/ + + /execution-domain/ + + /extension-scm/ + + /external-references/ + + / + + /io-domain/ + + /parametric-domain/ + + /provenance-domain/ + + /tags/ + + /top-level/ + + /usability-domain/ + + /user_guide/ + + diff --git a/public/sop/index.html b/public/sop/index.html new file mode 100644 index 0000000..46f0414 --- /dev/null +++ b/public/sop/index.html @@ -0,0 +1,239 @@ + + + + + + + BCO Curation SOP - BioCompute Object Documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+ + + +
+BioCompute Logo +
+
+

BCO Curation SOP

+

Author: BioCompute Consortium

+

Version: 2.0

+

Effective Date: Aug 2020

+

Intended audience: authors and developers

+

The following recommendations are intended to provide guidance on BCO™ creation, versioning, certification and authentication.

+

BCO IDs and Versioning

+

Intended Audience: BCO authors

+
    +
  • BioCompute IDs are used as persistent URLs. A novel usability domain must result in the creation of a new BCO with a new BCO ID. BCO IDs are immutable upon creation, and are never deleted or retired. If the usability domain (UD) remains unchanged, this results in a new version of the BCO. BCO ID example: OMX_000001
  • +
  • BCO major and minor versions can be incremented based on project/institution documented policies.
  • +
  • The BioCompute consortium maintains a database of registered authorities. Registered authorities are able to assign their reserved prefixes to their own IDs in the object_id field, such as OMX_000001. We encourage that everyone registers a prefix at biocomputeobject.org.
  • +
+

BioCompute Certification(s) and Authentication

+

Intended Audience: commercial or academic entities looking for additional BCO support

+

Platform certification: A BioCompute “audit” will be conducted by the BioCompute Consortium. +Requirements include:

+
    +
  • IEEE-2791 conformant BCOs can be created
  • +
  • Security (ex: immutable upon creation, secure sharing, platform security)
  • +
  • Data QC processes on input/output
  • +
+

Syntactical certification: Code is available on GitHub for download and use to ensure standard compliance.

+

Scientific certification: BCO consortium members will participate in the certification process; each certification process is projected to take ~ 3 months to 1 year for the development of pipelines. Verification Kit: Input+output file(s) (in-silico generated), and Template BCO (tBCO) that includes error domain).

+

Template and Run Authentication: The Template BCO (tBCO) is created once along with a Verification Kit. Verification Kit includes usually in silico generated input files, BCO (with error domain) and output files. Run BCOs (rBCO) uses the tBCOs, and the only changes allowed are in input (excluding reference files/databases) and output files field. tBCOs and rBCOs can be authenticated using secure blockchain technology.

+
    +
  • Template certification requirements: Input + output files
  • +
  • Run certification requirements: certified template + run BCO (to confirm that parameters and error domain are within range etc.)
  • +
+
+BioCompute Logo +

+

BCO Metadata

+

The three metadata fields are filled out at the time of submission. Validity check fills in the spec_version with the IEEE URL, an option to run a SHA256 (or just input your own hash value) for etag, and object_id is assigned (with option to choose from any prefix associated with the account).

+

Domain-specific guidance

+

Execution domain

+

When recording manual curation, the script field of the execution_domain should link to a Google Document or GitHub markdown that describes the steps, either programmatically or in a stepwise fashion. Manual curation steps should ALSO be properly documented in the description_domain. An easy way to conceptualize this is: Description domain is for people, Execution domain is for machine (or programmers).

+

Extension domain

+

Format of how the schema would be defined: Execution domain

+

Error domain

+

This domain can support a “QA/QC rules” subdomain which provides rules that, if the output file does not pass the appropriate criteria, then it is flagged as an error.

+

BCO Form-based portal

+

Intended Audience: BCO tool developers and authors

+

BCOs can be created using any bioinformatics platform that has BCO read and write functionalities. For users who do not have access to a bioinformatics platform they can use the BCO Consortium Editor tool which has some of the basic API functionalities:

+
    +
  • Create a BCO that is conformant to IEEE-2791.
  • +
  • Upload BCOs in batch mode. The tool runs QA/QC processes on those uploads and create unique IDs
  • +
  • Search for existing BCOs by author/title/usability/keywords
  • +
  • Download and install an instance within an organization’s firewall
  • +
  • View videos and documentation on tool use
  • +
+

This documentation is currently in the comment phase until Sept. 15, 2020. Please send your comments to Jonathon Keeney.

+ + + +
+
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/tags/index.html b/public/tags/index.html new file mode 100644 index 0000000..262a894 --- /dev/null +++ b/public/tags/index.html @@ -0,0 +1,178 @@ + + + + + + + Tags - BioCompute Object Documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+

Posts about Tags

+
+
    + +
+
+
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/tags/index.xml b/public/tags/index.xml new file mode 100644 index 0000000..e011f02 --- /dev/null +++ b/public/tags/index.xml @@ -0,0 +1,10 @@ + + + + Tags on BioCompute Object Documentation + /tags/ + Recent content in Tags on BioCompute Object Documentation + Hugo -- gohugo.io + en-us + + diff --git a/public/top-level/index.html b/public/top-level/index.html new file mode 100644 index 0000000..efc9bec --- /dev/null +++ b/public/top-level/index.html @@ -0,0 +1,220 @@ + + + + + + + Top Level Domains - BioCompute Object Documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+ + + +
+BioCompute Logo +
+
+

This document is part of the BioCompute Object User Guide

+

Back to BCO domains

+

2.0 Top Level Fields

+

These header fields uniquely define this BCO. These fields are required for every BCO and are represented at the top level object.

+

Condensed example:

+
{
+    "spec_version" : "https://w3id.org/ieee/ieee-2791-schema/",
+    "object_id": "https://example.com/bco/9487ae7e-c1aa-4a3c-b18f-3d3695b33ace",
+    "etag": "d41d8cd98f00b204e9800998ecf8427e",    
+    "provenance_domain": {
+    },
+    "...": { }
+}
+

2.0.1 BCO version “spec_version”

+

The version of the BCO specification used to define the BCO. It is recomended that this value be a permalink as defined in the w3id.org/biocompute repository.

+
"spec_version": "https://w3id.org/ieee/ieee-2791-schema/" 
+

2.0.2 BioCompute Object Identifier “object_id”

+

A unique identifier that should be applied to each BCO instance. These can be assigned by a BCO database engine or manually generated. IDs should never be reused. It is RECOMMENDED that the BCO identifier is based on a UUIDs (sometimes called GUIDs) to ensure uniqueness, either as a location-independent URN (e.g. urn:uuid:2bf8397b-9aa8-47f2-80a7-235653e8e824) or as part of an identifier permalink, (e.g. http://repo.example.com/bco/2bf8397b-9aa8-47f2-80a7-235653e8e824). While the UUID is the preferred method, IDs expressed as a URN or URL are also acceptable.

+
"object_id": "https://w3id.org/biocompute/1.3.0/examples/HCV1a.json"
+

2.0.3 ETag “etag”

+

A string-type, read-only value, protecting the object from internal or external alterations without proper validation. The string should be generated through the use of a SHA-256 hash function. Everything EXCEPT for the etag, object_id and spec_version should be included in the generation of the hash. For example:

+
    "provenance_domain": {},
+    "usability_domain": [],
+    "extension_domain":{},
+    "description_domain": {},
+    "execution_domain": {}, 
+    "parametric_domain": [], 
+    "io_domain": {},
+    "error_domain": {}
+

will generate the following:

+
"etag": "584C7FE128717E1712426AB19CAAEA8BC1E27365B54285BBEA1221284C7D3A48"
+

Additional domains

+

Additional description about the BCO itself is also provided in the provenance domain, description domain and usability domain. Other domains detail areas like execution and error ranges.

+ + + +
+
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/usability-domain/index.html b/public/usability-domain/index.html new file mode 100644 index 0000000..53bffaf --- /dev/null +++ b/public/usability-domain/index.html @@ -0,0 +1,207 @@ + + + + + + + Usability Domain - BioCompute Object Documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+ + + +
+BioCompute Logo +
+
+

This document is part of the BioCompute Object User Guide

+

Back to BCO domains

+

2.2 Usability Domain “usability_domain”

+

This section defines the usability_domain part of the BCO structure.

+

This field provides a space for the author to define the usability domain of the BCO. It is an array of free text values that should be consistant with terminology used in the name, external references (xref), and keywords sections. The usability_domain can accept template language to indicate values from the external_references. The template takes the form of:

+
    +
  • (SNP)[SO:0000694]
  • +
+

where ($term) and [$identifier] are an entry in the external_references section.

+

This field is to aid in search-ability and provide a specific scientific use case and a description of the function of the object. The usability domain along with keywords can help determine when and how the BCO can be used. It is recomended that a novel use of a specific BCO would result in the creation of a new entry with a new usability domain.

+
    "usability_domain": [
+
+        "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", 
+
+        "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", 
+
+        "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", 
+
+        "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20"]
+]
+
+ + +
+
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/user_guide/index.html b/public/user_guide/index.html new file mode 100644 index 0000000..24fa2be --- /dev/null +++ b/public/user_guide/index.html @@ -0,0 +1,271 @@ + + + + + + + User Guide - BioCompute Object Documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+ + + +
+BioCompute Logo +
+
+
This document was created by the BioCompute Object Consortium members (BCOC)
+

BioCompute Object (BCO) User Guide

+
+

This version: draft-2.0.0 +
This version is offerd as support for 2791-2020 - IEEE Approved Standard for Bioinformatics Computations and Analyses Generated by High-Throughput Sequencing (HTS) to Facilitate Communication.
+Previous version: 1.4.0
+Latest release: https://github.com/biocompute-objects/BCO_Specification/releases/latest
+Latest editor’s draft: https://github.com/biocompute-objects/BCO_Specification/tree/dev

+
+

Note that unless you are viewing a release this is a draft subject to change.

+

Table of contents:

+ +
+

1 Introduction

+

This document specifies the structure of BioCompute Objects. +The specification is split into multiple parts linked to from this +top-level document and are maintained in a +GithHub repository +where contributions are welcome.

+

Read more:

+ +
+

2 BioCompute Domains

+

BCOs are represented in JSON (JavaScript Object Notation) formatted text, adhearing to JSON schema draft-07. The JSON format was chosen because it is both human and machine readable/writable. For a detailed description of JSON see www.json.org.

+

BioCompute data types are defined as aggregates of the critical fields organized into the following domains: the provenance domain, the usability domain, the extension domain, the description domain, the execution domain, the parametric domain, the input and output domains, and the error domain. At the time of creation with actual values compliant to the schema the BCO should be assigned a unique identifier, a object_id. The object could then be assigned a unique digital etag.

+

Three of the domains in a BioCompute Object SHOULD become immutable upon assignment of the digital etag:

+
    +
  1. the Parametric Domain
  2. +
  3. the Execution Domain and
  4. +
  5. the I/O Domain
  6. +
+ +
+

3 Appendices

+

3.1 Appendix-I: BCO expanded view example

+

Complete example:

+ +

3.2 Appendix-II: External reference database list

+

CURIEs (short identifiers) like [taxonomy:31646] in BCOs can be expanded to complete identifiers.

+

Specifications:

+ +

3.3 Title 21 CFR Part 11

+

Code of Federal Regulations Title 21 Part 11: Electronic Records - Electronic Signatures

+

BioCompute project is being developed with Title 21 CFR Part 11 compliance in mind. The digital signatures incorporated into the format will provide the basis for provenance of BioCompute Object integrity using NIST proposed encryption algorithms. Execution domain and parametric domain (that have a potential impact on a result of computation) and identity domain will be used to create hash values and digital signature encryption keys which later can be used for computer or human validation of transmitted objects.

+

Discussions are now taking place to consider relevance of BioCompute Objects with relation to Title 21 CFR part 11. We encourage continuous input from BioCompute stakeholders on this subject now and while the concept is becoming more mature and more widely accepted by scientific and regulatory communities.

+
+

Relevant document link: Part 11: Electronic Records

+
+

3.4 Appendix IV - Compatibility

+

3.4.1 ISA for the experimental metadata

+

ISA is a metadata framework to manage an increasingly diverse set of life science, environmental and biomedical experiments that employ one or a combination of technologies. Built around the Investigation (the project context), Study (a unit of research) and Assay (analytical measurements) concepts, ISA helps to provide rich descriptions of experimental metadata (i.e. sample characteristics, technology and measurement types, sample-to-data relationships) so that the resulting data and discoveries are reproducible and reusable. The ISA Model and Serialization Specifications define an Abstract Model of the metadata framework that has been implemented in two format specifications, ISA-Tab and ISA-JSON (http://isa-tools.org/format/specification), both of which have supporting tools and services associated with them, including by a programmable Python AP (http://isa-tools.org) and a varied user community and contributors (http://www.isacommons.org). ISA focuses on structuring experimental metadata; raw and derived data files, codes, workflows etc are considered as external file that are referenced. An example, along its complementarity with other models and a computational workflow is illustrated in this paper, which shows how to explicitly declare elements of experimental design, variables, and findings: http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0127612

+

3.5 Appendix VI Acknowledgements

+

This document began development during the 2017 HTS-CSRS workshop. The discussion during the workshop led to the refinement and completion of this document. The workshop participants were a major part of the initial BCO community, and the comments and suggestions collected during the sessions were incorporated into this document. The people who participated in the 2017 workshop, and therefore made significant contributions are listed here: https://osf.io/h59uh/

+

BioCompute Object Consortium members (BCOC)

+

FDA: Vahan Simonyan, Mark Walderhaug, Ruth Bandler, Eric Donaldson, Elaine Thompson, Alin Voskanian, Anton Golikov, Konstantinos Karagiannis, Elaine Johanson, Adrian Myers, Errol Strain, Khaled Bouri, Tong Weida, Wenming Xiao, Md Shamsuzzaman

+

GW: Raja Mazumder, Charles Hadley S. King IV, Amanda Bell, Jeet Vora, Krista M. Smith, Robel +Kahsay

+

Documentation Community: Gil Alterovitz (Boston Children’s Hospital/Harvard Medical School, SMART/FHIR/HL7, GA4GH), Michael R. Crusoe (CWL), Marco Schito (C-Path), Konstantinos +Krampis (CUNY), Alexander (Sasha) Wait Zaranek (Curoverse), John Quackenbush (DFCI/Harvard), Geet Duggal (DNAnexus), Singer Ma (DNAnexus), Yuching Lai (DDL), Warren Kibbe (Duke), Tony, Burdett (EBI), Helen Parkinson (EBI), Stuart Young (Engility Corp), Anupama Joshi (Epinomics), Vineeta Agarwala (Flatiron Health), James Hirmas (GenomeNext), David Steinberg (UCSC), Veronica Miller (HIV Forum), Dan Taylor (Internet 2), Paul Duncan (Merck), Jianchao Yao (Merck & Co., Inc., Boston, MA USA), Marilyn Matz (Paradigm4), Ben Busby (NCBI), Eugene Yaschenko (NCBI), Zhining Wang (NCI), Hsinyi (Steve) Tsang (NCI), Durga Addepalli (NCI/Attain), Heidi Sofia (NIH), Scott Jackson (NIST), Paul Walsh (NSilico Life Science), Toby Bloom (NYGC), Hiroki Morizono (CNMC), Jeremy Goecks (Oregon Health and Science University), Srikanth Gottipati (Otsuka-US), Alex Poliakov (Paradigm4), Keith Nangle (Pistoia Alliance), Jonas S Almeida (Stony Brook Univ, SUNY), Dennis A. Dean, II (Seven Bridges Genomics), Dustin Holloway (Seven Bridges Genomics), Nisha Agarwal (Solvuu), Stian Soiland-Reyes (UNIMAN), Carole Goble (UNIMAN), Susanna-Assunta Sansone (University of Oxford), Philippe Rocca-Serra (University of Oxford), Phil Bourne (Univ. of Virginia), Joseph Nooraga (Fred Hutchinson Cancer Research Center)

+ + + +
+
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/schemas/README.MD b/schemas/README.MD deleted file mode 100755 index c5b9357..0000000 --- a/schemas/README.MD +++ /dev/null @@ -1,3 +0,0 @@ -# BioCompute Objects Schema -## version 1.3.0-alpha - diff --git a/schemas/extension_domain/fhir_extension.json b/schemas/extension_domain/fhir_extension.json deleted file mode 100644 index 9cdc7fb..0000000 --- a/schemas/extension_domain/fhir_extension.json +++ /dev/null @@ -1,44 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://www.w3id.org/biocompute/1.3.0/schemas/extension_domain/fhir_extension.json", - "type": "object", - "required": [ - "fhir_endpoint", - "fhir_version", - "fhir_resources" - ], - "properties": { - "fhir_endpoint": { - "type": "string", - "description": "Base URI of FHIR server where the resources are stored", - "examples": [ - "http://fhirtest.uhn.ca/baseDstu3" - ], - "format": "uri" - }, - "fhir_version": { - "type": "string", - "description": "FHIR version of the server endpoint" - }, - "fhir_resources": { - "type": "array", - "items": { - "type": "object", - "required": [ - "fhir_resource", - "fhir_id" - ], - "properties": { - "fhir_resource": { - "type": "string", - "description": "Type of FHIR resource used" - }, - "fhir_id": { - "type": "string", - "description": "Server-specific identifier string" - } - } - } - } - } -} \ No newline at end of file diff --git a/schemas/extension_domain/scm_extension.json b/schemas/extension_domain/scm_extension.json deleted file mode 100644 index 577fcd0..0000000 --- a/schemas/extension_domain/scm_extension.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://www.w3id.org/biocompute/1.3.0/schemas/extension_domain/scm_extension.json", - "type": "object", - "required": [ - "scm_repository", - "scm_type", - "scm_commit", - "scm_path" - ], - "properties": { - "scm_repository": { - "type": "string", - "examples": [ - "https://github.com/example/repo1" - ], - "format": "uri" - }, - "scm_type": { - "type": "string", - "enum": [ - "git", - "svn", - "hg", - "other" - ] - }, - "scm_commit": { - "type": "string", - "examples": [ - "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21" - ] - }, - "scm_path": { - "type": "string", - "examples": [ - "workflow/hive-viral-mutation-detection.cwl" - ], - "format": "string" - }, - "scm_preview": { - "type": "string", - "examples": [ - "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" - ], - "format": "uri" - } - } -} \ No newline at end of file diff --git a/scripts/change_log.py b/scripts/change_log.py new file mode 100755 index 0000000..a888251 --- /dev/null +++ b/scripts/change_log.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +################################################################################ + ##change_log.py## +"""change_log.py""" +################################################################################ + +import calendar +import datetime +import json +import os +import string +import re +import sys +from string import Template +import argparse + +PROJECT_DIRECTORY = os.path.join(os.path.dirname(__file__), os.pardir) + +TEMPLATE_TOP = Template(""" + +BioCompute Object Specification Release Notes +=========================================================== +## Release v ${release} ${month_name} ${year} + +""") + +TEMPLATE_CONTENTS = string.Template(""" +------------------------------------------------------------ +### Subject: ${subject} + +Commit Hash: ${commit} + +Author & Email: ${author} ${email} + +Commit Date and Time: ${date} + +#### Message: + + ${message} + +Changes +""") + +TEMPLATE_STATS = Template(""" + +- FILE: ${path} + + Deletions ${deletions} Insertions ${insertions} + +""") + + +TEMPLATE_BOTTOM = Template(""" + +============================================================ + +""") +#______________________________________________________________________________# +def create_arg_parser(): + """" + Creates and returns the ArgumentParser object. + """ + + parser = argparse.ArgumentParser(description='Parses git log output object for change-log generation.') + + parser.add_argument('-b', '--branch', default='dev', + help='current git branch checked out') + + return parser +#______________________________________________________________________________# +def load_logs(): + """ + + """ + + with open('./scripts/git-log.json', 'rU') as log_file: + log = json.load(log_file) + with open('./scripts/git-stat.json', 'rU') as stat_file: + stat = json.load(stat_file) + for i in log: + if len(i['message']) < 1: + i['message'] = 'NA' + if i['commit'] in stat.keys(): + i['stats'] = stat[i['commit']] + return log +#______________________________________________________________________________# +def release_contents( release, log ): + """ + """ + + template_params = dict( + month_name="{:%B}".format(datetime.date.today()), + year=datetime.date.today().year, + day="{:%d}".format(datetime.date.today()), + release=release + ) + + contents = '' + for i in log: + log_params = i + contents += TEMPLATE_CONTENTS.safe_substitute(**log_params) + for s in i['stats']: + stats = '' + stat_params = s + stats += TEMPLATE_STATS.safe_substitute(**stat_params) + contents += stats + + return template_params, contents +#______________________________________________________________________________# +def main( ): + arg_parser = create_arg_parser() + parsed_args = arg_parser.parse_args(sys.argv[1:]) + log = load_logs() + template_params, contents = release_contents(parsed_args.branch, log) + release_issue_contents = TEMPLATE_TOP.safe_substitute(**template_params) + release_issue_contents += contents + release_issue_contents += TEMPLATE_BOTTOM.safe_substitute(**template_params) + + with open('docs/CHANGELOG.md', 'w') as change_log: + change_log.write(release_issue_contents) + +#______________________________________________________________________________# +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/git-log.json b/scripts/git-log.json new file mode 100644 index 0000000..7881517 --- /dev/null +++ b/scripts/git-log.json @@ -0,0 +1 @@ +[{ "commit": "a147cc6a45449098849fdf602d2ffa669fd04049", "author": "HadleyKing", "email": "hadley_king@gwu.edu", "date": "Tue Jan 21 05:56:53 2020 -0500", "subject": "Deleted-IEEE-files", "message": "Deleted IEEE files and updated the scripts files for change log creation "}, { "commit": "0aae6a81b096af8f2551bc8b5df9fc965a8d283a", "author": "HadleyKing", "email": "hadley_king@gwu.edu", "date": "Sun Jan 19 09:09:45 2020 -0500", "subject": "Remove-IEEE_Docs", "message": "Remove IEEE_Docs and update to scripts/log.sh "}, { "commit": "3787b477413723dba89b70c7ae41592a6c18af40", "author": "HadleyKing", "email": "hadley_king@gwu.edu", "date": "Sat Jan 18 23:35:07 2020 -0500", "subject": "Add-CHANGELOG.md-and-log.sh", "message": "Add scripts to generate CHANGELOG.md "}, { "commit": "732272dc81965698eaac0f9aa6e31a7ff465c910", "author": "Hadley King", "email": "HadleyKing@users.noreply.github.com", "date": "Thu Jan 16 16:52:23 2020 -0500", "subject": "Merge-branch-dev-into-1.4.0", "message": ""}, { "commit": "bbd7715809c4034191d5387b56c7bd1ac253981f", "author": "HadleyKing", "email": "hadley_king@gwu.edu", "date": "Thu Jan 16 16:33:53 2020 -0500", "subject": "tsting", "message": ""}, { "commit": "f06ebddcbc9b3b73d91165eb0bb709a471bb2ce6", "author": "HadleyKing", "email": "hadley_king@gwu.edu", "date": "Thu Jan 16 16:32:38 2020 -0500", "subject": "test", "message": "test "}, { "commit": "d1113f6185d972a888033575c69e247a0e871187", "author": "HadleyKing", "email": "HadleyKing@users.noreply.github.com", "date": "Thu Jan 16 15:45:44 2020 -0500", "subject": "Update-README.md", "message": ""}, { "commit": "c0f79bc646692c8cd9bf050d4214df9e936a3c68", "author": "HadleyKing", "email": "hadley_king@gwu.edu", "date": "Thu Dec 19 12:25:19 2019 -0500", "subject": "git-add-docs-release_protocol.md", "message": ""}, { "commit": "c81160f0a494c0af9f7fb55385b9892170b3954b", "author": "HadleyKing", "email": "hadley_king@gwu.edu", "date": "Tue Nov 5 16:52:34 2019 -0500", "subject": "Updated-field-names", "message": "updated bco_id checksum bco_spec_version to object_id spec_version etag "}, { "commit": "6f8ef27a281408c2a11532a501d138aeb68c727a", "author": "hadley", "email": "hadley_king@gwu.edu", "date": "Tue Aug 13 13:47:06 2019 -0400", "subject": "Typo-67", "message": "Fixes #67 , more links repaired "}, { "commit": "68975cf23b57f3c5dae4d5e65ec75544bff1819a", "author": "hadley", "email": "hadley_king@gwu.edu", "date": "Tue Aug 13 13:42:24 2019 -0400", "subject": "Broken-Links", "message": "Mended broken links from document reorder "}, { "commit": "157db361625e2b9f71405452de8bb3c3c0718c42", "author": "hadley", "email": "hadley_king@gwu.edu", "date": "Tue Aug 13 13:25:29 2019 -0400", "subject": "Updated-docs", "message": "Updated the document structure to make the repository cleaner "}] \ No newline at end of file diff --git a/scripts/git-stat.json b/scripts/git-stat.json new file mode 100644 index 0000000..5534af6 --- /dev/null +++ b/scripts/git-stat.json @@ -0,0 +1 @@ +{"a147cc6a45449098849fdf602d2ffa669fd04049": [ {"insertions": "0", "deletions": "395", "path":"HCV1a.json"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/Draft"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/EMB"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/IEEE"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/IEEE"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/IEEE"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/IEEE"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/LC_Sponsor_Ballot_Overview_1July2016.ppt"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/Meetings/2018August29/08292018_Agenda.doc"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/Meetings/2018August29/08292018_EMB"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/Meetings/2018August29/08292018_Minutes.doc"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/Meetings/2018August29/08292018_P2791_PAR_Detail.pdf"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/Meetings/2018August29/08292018_Roberts-Rules-of-Order.pdf"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/Meetings/2018August29/08292018_VotingRoster_Original.docx"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/Meetings/2018August29/08292018_Welcome"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/Meetings/2018December03/12032018_Agenda.doc"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/Meetings/2018December03/12032018_Minutes.doc"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/Meetings/2018October22/10222018_Agenda.doc"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/Meetings/2018October22/10222018_Minutes.doc"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/Meetings/2018October22/10222018_P2791_Draft_Standard.doc"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/Meetings/2019May09/05092019_Agenda.doc"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/Meetings/2019May09/05092019_Minutes.doc"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/Meetings/2019May09/Comment"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/Meetings/2019May09/Individual_Roster_Public.xlsx"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/Meetings/Individual_Roster_Public.xlsx"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/P2971_D3_Dec2018.doc"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/P2971_D3_Dec2018_JGK.doc"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/P2971_D3_Dec2018_JGK_Revised.doc"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/P2971_D3_Dec2018_JGK_Revised_TrackChanges.doc"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/P2971_D3_Dec2018_JGK_Revised_TrackChanges_C.doc"}, {"insertions": "-", "deletions": "-", "path":"IEEE_Docs/P2971_D3_Dec2018_JGK_Revised_TrackChanges_C_OpenSourceNotice.doc"}, {"insertions": "0", "deletions": "11", "path":"IEEE_Docs/README.md"}, {"insertions": "0", "deletions": "337", "path":"IEEE_Docs/standard.md"}, {"insertions": "125", "deletions": "5", "path":"docs/CHANGELOG.md"}, {"insertions": "0", "deletions": "231", "path":"docs/CHANGELOG_draft.md"}, {"insertions": "71", "deletions": "43", "path":"scripts/change_log.py"}, {"insertions": "1", "deletions": "1", "path":"scripts/git-log.json"}, {"insertions": "1", "deletions": "1", "path":"scripts/git-stat.json"}, {"insertions": "5", "deletions": "3", "path":"scripts/log.sh"}], "0aae6a81b096af8f2551bc8b5df9fc965a8d283a": [ {"insertions": "173", "deletions": "126", "path":"examples/HCV1a.json"}, {"insertions": "2", "deletions": "0", "path":"scripts/log.sh"}], "3787b477413723dba89b70c7ae41592a6c18af40": [ {"insertions": "319", "deletions": "0", "path":"docs/CHANGELOG.md"}, {"insertions": "231", "deletions": "0", "path":"docs/CHANGELOG_draft.md"}, {"insertions": "1", "deletions": "2", "path":"docs/release_protocol.md"}, {"insertions": "98", "deletions": "0", "path":"scripts/change_log.py"}, {"insertions": "1", "deletions": "0", "path":"scripts/git-log.json"}, {"insertions": "1", "deletions": "0", "path":"scripts/git-stat.json"}, {"insertions": "5", "deletions": "0", "path":"scripts/log.sh"}], "732272dc81965698eaac0f9aa6e31a7ff465c910": [ ], "bbd7715809c4034191d5387b56c7bd1ac253981f": [ {"insertions": "2", "deletions": "1", "path":"docs/release_protocol.md"}], "f06ebddcbc9b3b73d91165eb0bb709a471bb2ce6": [ {"insertions": "4", "deletions": "0", "path":"docs/release_protocol.md"}], "d1113f6185d972a888033575c69e247a0e871187": [ {"insertions": "1", "deletions": "1", "path":"README.md"}], "c0f79bc646692c8cd9bf050d4214df9e936a3c68": [ {"insertions": "20", "deletions": "0", "path":"docs/release_protocol.md"}], "c81160f0a494c0af9f7fb55385b9892170b3954b": [ {"insertions": "174", "deletions": "127", "path":"HCV1a.json"}, {"insertions": "3", "deletions": "3", "path":"docs/bco-domains.md"}, {"insertions": "1", "deletions": "1", "path":"docs/provenance-domain.md"}, {"insertions": "10", "deletions": "10", "path":"docs/top-level.md"}, {"insertions": "2", "deletions": "2", "path":"docs/user_guide.md"}, {"insertions": "2", "deletions": "2", "path":"examples/HCV1a.json"}, {"insertions": "2", "deletions": "2", "path":"examples/HIVE_metagenomics.json"}, {"insertions": "2", "deletions": "2", "path":"examples/UVP.json"}, {"insertions": "2", "deletions": "2", "path":"examples/glycosylation-sites-UniCarbKB.json"}, {"insertions": "29", "deletions": "25", "path":"schemas/biocomputeobject.json"}, {"insertions": "2", "deletions": "2", "path":"schemas/provenance_domain.json"}], "6f8ef27a281408c2a11532a501d138aeb68c727a": [ {"insertions": "1", "deletions": "1", "path":"HCV1a.json"}, {"insertions": "1", "deletions": "1", "path":"docs/user_guide.md"}], "68975cf23b57f3c5dae4d5e65ec75544bff1819a": [ {"insertions": "5", "deletions": "1", "path":"README.md"}, {"insertions": "6", "deletions": "6", "path":"docs/user_guide.md"}], "157db361625e2b9f71405452de8bb3c3c0718c42": [ {"insertions": "14", "deletions": "14", "path":"README.md"}, {"insertions": "0", "deletions": "0", "path":"bco-domains.md"}, {"insertions": "0", "deletions": "0", "path":"description-domain.md"}, {"insertions": "0", "deletions": "0", "path":"error-domain.md"}, {"insertions": "0", "deletions": "0", "path":"execution-domain.md"}, {"insertions": "0", "deletions": "0", "path":"extension-fhir.md"}, {"insertions": "0", "deletions": "0", "path":"extension-scm.md"}, {"insertions": "0", "deletions": "0", "path":"external-references.md"}, {"insertions": "0", "deletions": "0", "path":"introduction.md"}, {"insertions": "0", "deletions": "0", "path":"io-domain.md"}, {"insertions": "0", "deletions": "0", "path":"parametric-domain.md"}, {"insertions": "0", "deletions": "0", "path":"provenance-domain.md"}, {"insertions": "0", "deletions": "0", "path":"top-level.md"}, {"insertions": "0", "deletions": "0", "path":"usability-domain.md"}, {"insertions": "0", "deletions": "0", "path":"user_guide.md"}, {"insertions": "0", "deletions": "0", "path":"validate.py"}]} \ No newline at end of file diff --git a/scripts/log.sh b/scripts/log.sh new file mode 100755 index 0000000..23c9f23 --- /dev/null +++ b/scripts/log.sh @@ -0,0 +1,9 @@ +#!/bin/sh -l + +branch_name=$(git symbolic-ref --short -q HEAD) + +git log main..$branch_name --pretty=format:"{ 'commit': '%H', 'author': '%aN', 'email': '%aE', 'date': '%ad', 'subject': '%f', 'message': '%b'}," $@ | sed -e "s/\"//g"| sed -e "s/'/\"/g" | tr '\n\r' ' '| perl -pe 'BEGIN{print "["}; END{print "]"}' | perl -pe 's/}, ]/}]/' > ./scripts/git-log.json + +git log main..$branch_name --numstat --format='%H' $@ | perl -lawne ' if (defined $F[1]) { print qq#{"insertions": "$F[0]", "deletions": "$F[1]", "path":"$F[2]"},# } elsif (defined $F[0]) { print qq#],\n"$F[0]": [# }; END{print qq#],#}' | tail -n +2 | perl -wpe 'BEGIN{print "{"}; END{print "}"}' | tr '\n\r' ' ' | perl -wpe 's#(]|}),\s*(]|})#$1$2#g' | perl -wpe 's#,\s*?}$#}#' > ./scripts/git-stat.json + +python ./scripts/change_log.py -b $branch_name \ No newline at end of file diff --git a/schemas/validate.py b/scripts/validate.py similarity index 98% rename from schemas/validate.py rename to scripts/validate.py index 0619423..76e6d60 100755 --- a/schemas/validate.py +++ b/scripts/validate.py @@ -6,7 +6,6 @@ """Used to test a BCO against the schema. The following commands were used to run the script: cd BCO_specification/ - python -m venv env source env/bin/activate pip install jsonschema jsonref python validate.py HCV1a.json $PWD/schemas/biocomputeobject.json diff --git a/static/_redirects b/static/_redirects new file mode 100644 index 0000000..e69de29 diff --git a/static/docs/BCOCheatSheet.pdf b/static/docs/BCOCheatSheet.pdf new file mode 100644 index 0000000..6b43af6 Binary files /dev/null and b/static/docs/BCOCheatSheet.pdf differ diff --git a/static/docs/ReviewerWorkshop_24June2020_Deck.pdf b/static/docs/ReviewerWorkshop_24June2020_Deck.pdf new file mode 100644 index 0000000..a627452 Binary files /dev/null and b/static/docs/ReviewerWorkshop_24June2020_Deck.pdf differ diff --git a/static/images/Almeida.jpg b/static/images/Almeida.jpg new file mode 100644 index 0000000..0a9c098 Binary files /dev/null and b/static/images/Almeida.jpg differ diff --git a/static/images/Alterovitz.png b/static/images/Alterovitz.png new file mode 100755 index 0000000..62089c5 Binary files /dev/null and b/static/images/Alterovitz.png differ diff --git a/static/images/Crusoe.jpg b/static/images/Crusoe.jpg new file mode 100644 index 0000000..e5c8c23 Binary files /dev/null and b/static/images/Crusoe.jpg differ diff --git a/static/images/Dean.png b/static/images/Dean.png new file mode 100644 index 0000000..4f11e27 Binary files /dev/null and b/static/images/Dean.png differ diff --git a/static/images/Goble.png b/static/images/Goble.png new file mode 100755 index 0000000..ec056a4 Binary files /dev/null and b/static/images/Goble.png differ diff --git a/static/images/Goecks.png b/static/images/Goecks.png new file mode 100644 index 0000000..f3a7785 Binary files /dev/null and b/static/images/Goecks.png differ diff --git a/static/images/Golikov.jpg b/static/images/Golikov.jpg new file mode 100644 index 0000000..c4605df Binary files /dev/null and b/static/images/Golikov.jpg differ diff --git a/static/images/Karagiannis.jpg b/static/images/Karagiannis.jpg new file mode 100644 index 0000000..8b4265d Binary files /dev/null and b/static/images/Karagiannis.jpg differ diff --git a/static/images/Keeney.png b/static/images/Keeney.png new file mode 100644 index 0000000..cf369de Binary files /dev/null and b/static/images/Keeney.png differ diff --git a/static/images/King.png b/static/images/King.png new file mode 100644 index 0000000..3880230 Binary files /dev/null and b/static/images/King.png differ diff --git a/static/images/Krampis.jpg b/static/images/Krampis.jpg new file mode 100644 index 0000000..5c9826e Binary files /dev/null and b/static/images/Krampis.jpg differ diff --git a/static/images/Mazumder.png b/static/images/Mazumder.png new file mode 100755 index 0000000..fb9439c Binary files /dev/null and b/static/images/Mazumder.png differ diff --git a/static/images/Patel.jpg b/static/images/Patel.jpg new file mode 100644 index 0000000..3465efb Binary files /dev/null and b/static/images/Patel.jpg differ diff --git a/static/images/Simonyan.png b/static/images/Simonyan.png new file mode 100755 index 0000000..19b17b4 Binary files /dev/null and b/static/images/Simonyan.png differ diff --git a/static/images/Soiland-Reyes.jpg b/static/images/Soiland-Reyes.jpg new file mode 100644 index 0000000..2c8d0e9 Binary files /dev/null and b/static/images/Soiland-Reyes.jpg differ diff --git a/static/images/Soranzo.jpg b/static/images/Soranzo.jpg new file mode 100644 index 0000000..efe6d27 Binary files /dev/null and b/static/images/Soranzo.jpg differ diff --git a/static/images/Taylor.jpg b/static/images/Taylor.jpg new file mode 100644 index 0000000..928132a Binary files /dev/null and b/static/images/Taylor.jpg differ diff --git a/static/images/Thompson.jpg b/static/images/Thompson.jpg new file mode 100644 index 0000000..b4cb5b5 Binary files /dev/null and b/static/images/Thompson.jpg differ diff --git a/static/images/Travis.jpg b/static/images/Travis.jpg new file mode 100644 index 0000000..e7e4fa7 Binary files /dev/null and b/static/images/Travis.jpg differ diff --git a/static/images/about.2.png b/static/images/about.2.png new file mode 100644 index 0000000..c1e3cd8 Binary files /dev/null and b/static/images/about.2.png differ diff --git a/static/images/about.3.png b/static/images/about.3.png new file mode 100644 index 0000000..2c97b93 Binary files /dev/null and b/static/images/about.3.png differ diff --git a/static/images/about.4.png b/static/images/about.4.png new file mode 100644 index 0000000..b498cfd Binary files /dev/null and b/static/images/about.4.png differ diff --git a/static/images/about.8.png b/static/images/about.8.png new file mode 100644 index 0000000..9466de3 Binary files /dev/null and b/static/images/about.8.png differ diff --git a/static/images/certification_requirements.png b/static/images/certification_requirements.png new file mode 100644 index 0000000..eb88ff5 Binary files /dev/null and b/static/images/certification_requirements.png differ diff --git a/static/images/clinical.1.png b/static/images/clinical.1.png new file mode 100755 index 0000000..4c47d64 Binary files /dev/null and b/static/images/clinical.1.png differ diff --git a/static/images/favicon.png b/static/images/favicon.png new file mode 100644 index 0000000..3267b2c Binary files /dev/null and b/static/images/favicon.png differ diff --git a/static/images/landing.1.png b/static/images/landing.1.png new file mode 100644 index 0000000..b5905a3 Binary files /dev/null and b/static/images/landing.1.png differ diff --git a/static/images/landing.2-original.png b/static/images/landing.2-original.png new file mode 100644 index 0000000..be443dc Binary files /dev/null and b/static/images/landing.2-original.png differ diff --git a/static/images/landing.2.png b/static/images/landing.2.png new file mode 100644 index 0000000..36475ed Binary files /dev/null and b/static/images/landing.2.png differ diff --git a/static/images/landing.4-original.png b/static/images/landing.4-original.png new file mode 100644 index 0000000..ed0d75b Binary files /dev/null and b/static/images/landing.4-original.png differ diff --git a/static/images/landing.4.png b/static/images/landing.4.png new file mode 100644 index 0000000..75a8334 Binary files /dev/null and b/static/images/landing.4.png differ diff --git a/static/images/landing.5.png b/static/images/landing.5.png new file mode 100644 index 0000000..914338a Binary files /dev/null and b/static/images/landing.5.png differ diff --git a/static/images/logo.Argentys.png b/static/images/logo.Argentys.png new file mode 100644 index 0000000..68e0194 Binary files /dev/null and b/static/images/logo.Argentys.png differ diff --git a/static/images/logo.Embleema.png b/static/images/logo.Embleema.png new file mode 100644 index 0000000..dc8f399 Binary files /dev/null and b/static/images/logo.Embleema.png differ diff --git a/static/images/logo.MilliporeSigma.png b/static/images/logo.MilliporeSigma.png new file mode 100644 index 0000000..7988b5a Binary files /dev/null and b/static/images/logo.MilliporeSigma.png differ diff --git a/static/images/logo.OpenHealthSystemsLaboratory.png b/static/images/logo.OpenHealthSystemsLaboratory.png new file mode 100644 index 0000000..8ffcc7f Binary files /dev/null and b/static/images/logo.OpenHealthSystemsLaboratory.png differ diff --git a/static/images/logo.about.png b/static/images/logo.about.png new file mode 100644 index 0000000..8f948f6 Binary files /dev/null and b/static/images/logo.about.png differ diff --git a/static/images/logo.clinical.png b/static/images/logo.clinical.png new file mode 100755 index 0000000..11a785d Binary files /dev/null and b/static/images/logo.clinical.png differ diff --git a/static/images/logo.helix_only.png b/static/images/logo.helix_only.png new file mode 100644 index 0000000..914338a Binary files /dev/null and b/static/images/logo.helix_only.png differ diff --git a/static/images/logo.regulatory.png b/static/images/logo.regulatory.png new file mode 100755 index 0000000..b93f83e Binary files /dev/null and b/static/images/logo.regulatory.png differ diff --git a/static/images/logo.research.png b/static/images/logo.research.png new file mode 100755 index 0000000..b040ce6 Binary files /dev/null and b/static/images/logo.research.png differ diff --git a/static/images/logo.workshop.png b/static/images/logo.workshop.png new file mode 100644 index 0000000..0eaa690 Binary files /dev/null and b/static/images/logo.workshop.png differ diff --git a/static/images/organization.2.png b/static/images/organization.2.png new file mode 100644 index 0000000..7d8f8cb Binary files /dev/null and b/static/images/organization.2.png differ diff --git a/static/images/powered-by-aws.png b/static/images/powered-by-aws.png new file mode 100644 index 0000000..0e299a8 Binary files /dev/null and b/static/images/powered-by-aws.png differ diff --git a/static/images/ppp.png b/static/images/ppp.png new file mode 100644 index 0000000..d545c15 Binary files /dev/null and b/static/images/ppp.png differ diff --git a/static/images/services.1.png b/static/images/services.1.png new file mode 100644 index 0000000..f6b94db Binary files /dev/null and b/static/images/services.1.png differ diff --git a/static/images/services.2.png b/static/images/services.2.png new file mode 100644 index 0000000..336961a Binary files /dev/null and b/static/images/services.2.png differ diff --git a/static/images/services.3.png b/static/images/services.3.png new file mode 100644 index 0000000..020c0ba Binary files /dev/null and b/static/images/services.3.png differ diff --git a/static/images/services.4.png b/static/images/services.4.png new file mode 100644 index 0000000..10b1007 Binary files /dev/null and b/static/images/services.4.png differ diff --git a/themes/hugo-biocompute/README.md b/themes/hugo-biocompute/README.md new file mode 100644 index 0000000..dbd79b5 --- /dev/null +++ b/themes/hugo-biocompute/README.md @@ -0,0 +1,3 @@ +# hugo-biocompute + +A Bootstrap-based, opinionated theme for Hugo. It is based on the original design for [biocomputeobject.org](https://www.biocomputeobject.org/). diff --git a/themes/hugo-biocompute/archetypes/default.md b/themes/hugo-biocompute/archetypes/default.md new file mode 100644 index 0000000..c95fab7 --- /dev/null +++ b/themes/hugo-biocompute/archetypes/default.md @@ -0,0 +1,10 @@ +--- +title: '{{ replace .TranslationBaseName "-" " " | title }}' +date: {{ .Date }} +draft: true +meta_img: "images/image.png" +tags: + - "one tag" + - "another tag" +description: "Description for the page." +--- diff --git a/themes/hugo-biocompute/layouts/404.html b/themes/hugo-biocompute/layouts/404.html new file mode 100644 index 0000000..cf627bb --- /dev/null +++ b/themes/hugo-biocompute/layouts/404.html @@ -0,0 +1,9 @@ +{{ define "main" }} +
+
+

404

+

page not found

+

back to the homepage

+
+
+{{ end }} \ No newline at end of file diff --git a/themes/hugo-biocompute/layouts/_default/baseof.html b/themes/hugo-biocompute/layouts/_default/baseof.html new file mode 100644 index 0000000..51ce2e1 --- /dev/null +++ b/themes/hugo-biocompute/layouts/_default/baseof.html @@ -0,0 +1,101 @@ + + + + + + + {{ block "title" . }}{{ if .IsHome }} {{ .Site.Title }} {{ else }} {{ .Title }} - {{ .Site.Title }} + {{ end }}{{ end }} + + + + + + + + + + + + + + {{ if .Site.Params.twitter }} + + + {{ end }} + + + + + + + + + + + + {{ if .Site.Params.highlightjs }} + + {{ end }} + + {{ if .Site.Params.progressively }} + + {{ end }} + + + + {{ if .Site.RSSLink }} + + {{ end }} + + + + {{ block "main" . }}{{ end }} + + {{/* + + + */}} + + {{ if .Site.Params.highlightjs }} + + {{ range .Site.Params.highlightjslanguages }} + + {{ end }} + + {{ end }} + + {{ if .Site.Params.progressively }} + + + {{ end }} + + {{ if .Site.Params.google_tag_manager }} + {{ partial "gtm.html" . }} + {{ end }} + + {{ if .Site.Params.uselatex }} + {{ partial "footer_mathjax.html" . }} + {{ end }} + + + \ No newline at end of file diff --git a/themes/hugo-biocompute/layouts/_default/list.html b/themes/hugo-biocompute/layouts/_default/list.html new file mode 100644 index 0000000..009176b --- /dev/null +++ b/themes/hugo-biocompute/layouts/_default/list.html @@ -0,0 +1,23 @@ +{{ define "main" }} +{{ partial "header" . }} + +
+
+
+
+
+

Tags

+
+ +
+
+
+
+
+ +{{ partial "footer" . }} +{{ end }} diff --git a/themes/hugo-biocompute/layouts/_default/section.html b/themes/hugo-biocompute/layouts/_default/section.html new file mode 100644 index 0000000..a046e9c --- /dev/null +++ b/themes/hugo-biocompute/layouts/_default/section.html @@ -0,0 +1,28 @@ +{{ define "main" }} + {{ partial "header" . }} + +
+
+
+
+
+ {{ partial "content" . }} +
+ +
+

Sections

+
    + {{ range sort .Pages "Title" "asc" }} +
  • + {{ .Title }} +
  • + {{ end }} +
+
+
+
+
+
+ + {{ partial "footer" . }} +{{ end }} diff --git a/themes/hugo-biocompute/layouts/_default/single.html b/themes/hugo-biocompute/layouts/_default/single.html new file mode 100644 index 0000000..d4ac3bd --- /dev/null +++ b/themes/hugo-biocompute/layouts/_default/single.html @@ -0,0 +1,17 @@ +{{ define "main" }} + {{ partial "header" . }} + +
+
+
+
+
+ {{ partial "content" . }} +
+
+
+
+
+ + {{ partial "footer" . }} +{{ end }} diff --git a/themes/hugo-biocompute/layouts/_default/taxonomy.html b/themes/hugo-biocompute/layouts/_default/taxonomy.html new file mode 100644 index 0000000..aa1e4f9 --- /dev/null +++ b/themes/hugo-biocompute/layouts/_default/taxonomy.html @@ -0,0 +1,23 @@ +{{ define "main" }} +{{ partial "header" . }} + +
+
+
+
+
+

Posts about {{ .Title }}

+
+
    + {{ range .Data.Pages.ByPublishDate }} + {{ partial "list" . }} + {{ end }} +
+
+
+
+
+
+ +{{ partial "footer" . }} +{{ end }} diff --git a/themes/hugo-biocompute/layouts/index.html b/themes/hugo-biocompute/layouts/index.html new file mode 100644 index 0000000..1430419 --- /dev/null +++ b/themes/hugo-biocompute/layouts/index.html @@ -0,0 +1,19 @@ +{{ define "main" }} +{{ partial "header" . }} + +
+
+
+
+
+
+ {{ partial "content" . }} +
+
+
+
+
+
+ +{{ partial "footer" . }} +{{ end }} \ No newline at end of file diff --git a/themes/hugo-biocompute/layouts/partials/content.html b/themes/hugo-biocompute/layouts/partials/content.html new file mode 100644 index 0000000..b4af9e9 --- /dev/null +++ b/themes/hugo-biocompute/layouts/partials/content.html @@ -0,0 +1,6 @@ +{{ if .Site.Params.progressively }} + {{ $newImage := (print "
$3
") }} + {{ .Content | replaceRE "(.*)" $newImage | safeHTML }} +{{ else }} + {{ .Content }} +{{ end }} diff --git a/themes/hugo-biocompute/layouts/partials/disqus.html b/themes/hugo-biocompute/layouts/partials/disqus.html new file mode 100644 index 0000000..49262d5 --- /dev/null +++ b/themes/hugo-biocompute/layouts/partials/disqus.html @@ -0,0 +1,32 @@ +
+ +
+
+
+
+
+
+
+
+
+
+
+ + + +
\ No newline at end of file diff --git a/themes/hugo-biocompute/layouts/partials/footer.html b/themes/hugo-biocompute/layouts/partials/footer.html new file mode 100644 index 0000000..e72bdcc --- /dev/null +++ b/themes/hugo-biocompute/layouts/partials/footer.html @@ -0,0 +1,57 @@ + \ No newline at end of file diff --git a/themes/hugo-biocompute/layouts/partials/footer_mathjax.html b/themes/hugo-biocompute/layouts/partials/footer_mathjax.html new file mode 100644 index 0000000..191286e --- /dev/null +++ b/themes/hugo-biocompute/layouts/partials/footer_mathjax.html @@ -0,0 +1,4 @@ +{{ if or (in (string .Content) "\\") (in (string .Content) "$") }} + + +{{ end }} diff --git a/themes/hugo-biocompute/layouts/partials/gtm.html b/themes/hugo-biocompute/layouts/partials/gtm.html new file mode 100644 index 0000000..db0641e --- /dev/null +++ b/themes/hugo-biocompute/layouts/partials/gtm.html @@ -0,0 +1,7 @@ + + \ No newline at end of file diff --git a/themes/hugo-biocompute/layouts/partials/header.html b/themes/hugo-biocompute/layouts/partials/header.html new file mode 100644 index 0000000..2497b46 --- /dev/null +++ b/themes/hugo-biocompute/layouts/partials/header.html @@ -0,0 +1,33 @@ +
+
+
+
+ + +
+
+
+
diff --git a/themes/hugo-biocompute/layouts/partials/list.html b/themes/hugo-biocompute/layouts/partials/list.html new file mode 100644 index 0000000..99da389 --- /dev/null +++ b/themes/hugo-biocompute/layouts/partials/list.html @@ -0,0 +1,8 @@ +
  • +
    + {{ .Date.Format "2006-01-02" }} +
    + +
  • \ No newline at end of file diff --git a/themes/hugo-biocompute/layouts/partials/utterances.html b/themes/hugo-biocompute/layouts/partials/utterances.html new file mode 100644 index 0000000..fb491b5 --- /dev/null +++ b/themes/hugo-biocompute/layouts/partials/utterances.html @@ -0,0 +1,15 @@ +
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    \ No newline at end of file diff --git a/themes/hugo-biocompute/static/css/custom.css b/themes/hugo-biocompute/static/css/custom.css new file mode 100644 index 0000000..bfd6874 --- /dev/null +++ b/themes/hugo-biocompute/static/css/custom.css @@ -0,0 +1,352 @@ +/* Global styles */ + +body { + font-family: "Public Sans", -apple-system, BlinkMacSystemFont, "Segoe UI", "Roboto", "Helvetica Neue", Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol"; + font-feature-settings: "liga", "kern"; +} + +/* Fix jumping scrollbar when switching to long pages */ + +html { + margin-left: calc(100vw - 100%); + margin-right: 0; +} + +/* link */ + +a { + color: #0071bc; + text-decoration: none; + background-color: transparent; +} + +a:hover, a:focus, a:active { + color: #8cc53f; + text-decoration: none; +} + +/* landing content */ + +.landing { + font-size: 1.125rem; + line-height: 1.875rem; +} + +.landing p { + margin-bottom: 1.5rem; +} + +/* horizontal rule */ + +hr { + border: 0; + border-top: 1px solid #e1e9f4; +} + +.footer hr { + margin: 1.3rem 1.52rem 1.3rem 1.52rem; +} + +/* Partial: Colors */ + +.text-patreon { + color: #F96854 !important; +} + +.text-ko-fi { + color: #DF5000 !important; +} + +.text-bmc { + color: #FF813F !important; +} + +/* Partial: Header */ + +.header a:hover { + text-decoration: none; +} + +.header ul.nav li { + font-size: 1rem; +} + +.header ul.nav li a.text-events { + color: #8cc53f; +} + +.header ul.nav li a.text-events:hover { + color: #0071bc; +} + +/* Partial: Content */ + +.content .page-heading { + font-size: 1.5rem; + font-weight: 700; + letter-spacing: -0.005rem; + text-transform: "capitalize"; + -moz-osx-font-smoothing: grayscale; + -webkit-font-smoothing: antialiased; + color: #333333; + margin-bottom: 16px; +} + +.content h1, h2, h3, h4, h5, h6 { + font-family: "Public Sans", -apple-system, BlinkMacSystemFont, "Segoe UI", "Roboto", "Helvetica Neue", Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol"; + font-weight: 700; + color: #333333; + margin-bottom: 1rem; +} + +.content .meta { + font-weight: 400; + font-size: 1.125rem; + color: #738491; + margin-bottom: 10px; +} + +.content .meta a { + text-decoration: none; +} + +.content .middot:before { + margin: 0 3px; + content: "·"; +} + +.content .caption { + text-align: center; + margin-top: 10px; + color: #738491; + font-size: 0.9rem; +} + +.content .markdown { + font-family: "Public Sans", -apple-system, BlinkMacSystemFont, "Segoe UI", "Roboto", "Helvetica Neue", Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol"; + font-weight: 400; + font-size: 1.125rem; + color: #333333; + line-height: 1.75rem; +} + +.content .markdown li { + margin-top: 1rem; + margin-bottom: 1rem; +} + +.content twitterwidget { + margin: auto; +} + +.content .meta, .content .markdown h1, .content .markdown h2, .content .markdown h3, .content .markdown h4, .content .markdown h5, .content .markdown h6, .content .markdown p, .content .markdown ul, .content .markdown ol, .content .markdown dl, .content .markdown blockquote, .gist-file { + margin-left: 1.5rem; + margin-right: 1.5rem; +} + +div.alert { + margin-left: 1.5rem; + margin-right: 1.5rem; +} + +div.alert p { + margin-bottom: 0; +} + +/* blog post typography */ + +.content .blog-post-title { + font-size: 2.5rem; + line-height: 3rem; +} + +.content .blog-post-content { + font-size: 1.3125rem; + line-height: 2rem; +} + +/* centered, boxed blog post images */ + +.content .figure img { + display: block; + margin-left: auto; + margin-right: auto; + padding-left: 1.5rem; + padding-right: 1.5rem; + max-width: 100%; +} + +.content .markdown code, .content .markdown pre { + font-family: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; + background-color: #fff; +} + +.content .markdown code { + /* enclosed by single backtick (`) */ + color: #333; + padding: .2em .4em; + margin: 0; + font-size: 1rem; + background-color: rgba(27, 31, 35, .05); + border-radius: 6px; +} + +.content .markdown pre { + /* Hugo specific: consider using the 'highlight' shortcode */ + display: block; + margin-top: 1rem; + margin-bottom: 2rem; + padding: 1rem; + line-height: 20px; + white-space: pre; + word-break: break-all; + word-wrap: break-word; + margin-left: 1.5rem; + margin-right: 1.5rem; + border: 1px solid #ddd; + border-radius: 6px; +} + +.content .markdown pre code, .content .markdown pre output { + /* enclosed by 4 backticks (````) */ + padding: 0; + font-size: 0.75rem; + line-height: 20px; + background-color: #fff; + border-radius: 0; +} + +.content .markdown blockquote { + padding: 0.5rem 0.5rem; + margin-top: 1rem; + margin-bottom: 1rem; + color: #7a7a7a; + border-left: 0.25rem solid #595959; +} + +.content .markdown blockquote p:last-child { + margin-bottom: 0; +} + +.content .markdown figure { + background: #fff; +} + +.content .groupby { + list-style: none; + color: #333333; + font-size: 2rem; + font-weight: 700; + margin-top: 10px; + margin-bottom: 8px; +} + +.content .post-item { + display: -webkit-flex; + display: -moz-flex; + display: -ms-flexbox; + display: -ms-flex; + display: flex; + margin-bottom: 1rem; +} + +.content .post-item .meta-date { + font-size: 1.125rem; + color: #738491; + line-height: 2rem; + display: block; + min-width: 10rem; +} + +.content .post-item .meta-title { + font-size: 1.125rem; + line-height: 2rem; + margin-left: -3rem; +} + +.content .navigation .icon { + width: 16px; + height: 16px; +} + +/* Partial: Utterances Comments */ + +.comments { + padding-left: 1.5rem; + padding-right: 1.5rem; + max-width: 100%; +} + +/* Partial: Footer */ + +.footer { + font-weight: 400; + font-size: 1.125rem; +} + +.footer a { + color: #333333; +} + +.footer a:hover, .footer a:focus { + color: #0071bc; +} + +.footer a { + margin-left: 6px; + margin-right: 6px; +} + +.footer a:first-child { + margin-left: 0; +} + +.footer a:last-child { + margin-right: 0; +} + +.site-copyright { + min-width: 16em; +} + +.site-social { + text-align: right; +} + +.site-social ul { + list-style-type: none; + padding: 0; +} + +.site-social li { + display: inline-block; +} + +.site-social li:not(:last-child)::after { + content: '|'; + color: #e1e9f4; + margin: 0 0.3rem 0 0.5rem; +} + +/* < medium screens */ + +@media (max-width: 767.98px) { + html { + font-size: 0.9rem; + } + .content .meta, .content .markdown h1, .content .markdown h2, .content .markdown h3, .content .markdown h4, .content .markdown h5, .content .markdown h6, .content .markdown p, .content .markdown ul, .content .markdown ol, .content .markdown dl, .content .markdown blockquote { + margin-left: 0; + margin-right: 0; + } +} + +/* < large screens */ + +@media (max-width: 991.98px) { + .footer hr { + margin: 1.3rem 0rem 1.3rem 0rem; + } + .site-social { + text-align: left; + } +} \ No newline at end of file diff --git a/themes/hugo-biocompute/static/js/math-code.js b/themes/hugo-biocompute/static/js/math-code.js new file mode 100644 index 0000000..a435806 --- /dev/null +++ b/themes/hugo-biocompute/static/js/math-code.js @@ -0,0 +1,20 @@ +(function() { + var i, text, code, codes = document.getElementsByTagName('code'); + for (i = 0; i < codes.length;) { + code = codes[i]; + if (code.parentNode.tagName !== 'PRE' && code.childElementCount === 0) { + text = code.textContent; + if (/^\$[^$]/.test(text) && /[^$]\$$/.test(text)) { + text = text.replace(/^\$/, '\\(').replace(/\$$/, '\\)'); + code.textContent = text; + } + if (/^\\\((.|\s)+\\\)$/.test(text) || /^\\\[(.|\s)+\\\]$/.test(text) || + /^\$(.|\s)+\$$/.test(text) || + /^\\begin\{([^}]+)\}(.|\s)+\\end\{[^}]+\}$/.test(text)) { + code.outerHTML = code.innerHTML; // remove + continue; + } + } + i++; + } +})(); diff --git a/themes/hugo-biocompute/theme.toml b/themes/hugo-biocompute/theme.toml new file mode 100644 index 0000000..6cff1c4 --- /dev/null +++ b/themes/hugo-biocompute/theme.toml @@ -0,0 +1,10 @@ +name = "biocompute" +description = "A Bootstrap-based theme for Hugo." +homepage = "https://github.com/biocompute-objects/BCO_Specification/" +tags = ["bootstrap", "responsive"] +min_version = 0.17 + +[author] + name = "HadleyKing" + homepage = "https://github.com/HadleyKing" + repo = "https://github.com/biocompute-objects/BCO_Specification/"