diff --git a/src/bento/aboutPagesRoutes.js b/src/bento/aboutPagesRoutes.js
index 8eb9894f..1f20f924 100644
--- a/src/bento/aboutPagesRoutes.js
+++ b/src/bento/aboutPagesRoutes.js
@@ -1,8 +1,7 @@
export default [
- '/purpose',
- '/crdc',
- '/ctdc-data-model',
'/developers',
+ '/purpose',
'/support',
- '/request-access',
+ '/submit',
+ '/cloud-computing'
];
diff --git a/src/components/InteractiveHero/InteractiveHero.js b/src/components/InteractiveHero/InteractiveHero.js
index 4948fd9b..b91d8f8d 100644
--- a/src/components/InteractiveHero/InteractiveHero.js
+++ b/src/components/InteractiveHero/InteractiveHero.js
@@ -105,7 +105,7 @@ const TherapiesActiveText = ({ classes, heroData }) => (
{heroData ? heroData.numberOfTherapies : 'NA'}
{' '}
- ThERAPIES
+ THERAPIES
{' '}
diff --git a/src/components/Layout/LayoutView.js b/src/components/Layout/LayoutView.js
index 280e7802..99574aeb 100644
--- a/src/components/Layout/LayoutView.js
+++ b/src/components/Layout/LayoutView.js
@@ -78,7 +78,7 @@ const Layout = ({ classes, isSidebarOpened }) => {
),
)}
diff --git a/src/config/globalHeaderData.js b/src/config/globalHeaderData.js
index d38070f2..cdd6d789 100644
--- a/src/config/globalHeaderData.js
+++ b/src/config/globalHeaderData.js
@@ -109,6 +109,12 @@ export const navbarSublists = {
id: 'navbar-dropdown-item-data-harmonization',
className: 'navMobileSubItem',
},
+ {
+ name: 'Cloud computing',
+ link: '/cloud-computing',
+ id: 'navbar-dropdown-item-data-harmonization',
+ className: 'navMobileSubItem',
+ },
],
"Resources": [
{
diff --git a/src/content/prod/aboutPagesContent.yaml b/src/content/prod/aboutPagesContent.yaml
index f51a060f..b563112f 100644
--- a/src/content/prod/aboutPagesContent.yaml
+++ b/src/content/prod/aboutPagesContent.yaml
@@ -1,986 +1,46 @@
-- page: '/crdc'
- title: "Cancer Research Data Commons (CRDC) and Analysis"
+- page: '/submit'
+ title: "Data Submission"
primaryContentImage: https://raw.githubusercontent.com/CBIIT/datacommons-assets/ctdc_Assets/ctdc/images/aboutPages/About_CRDC.png
content:
- - paragraph: "$$#CRDC:#$$"
- - paragraph: "$$[The Cancer Research DataCommons](https://datascience.cancer.gov/data-commons)$$ is an initiative from NCI’s Center for Biomedical
- Informatics & Information Technology ($$[CBIIT](https://datascience.cancer.gov/)$$). CBIIT’s vision for the project consists of a
- virtual, expandable infrastructure that provides secure access to many different data types across scientific domains,
- allowing users to analyze, share, and store results, leveraging the storage and elastic compute, or ability to easily scale resources, of the
- cloud. The ability to combine diverse data types and perform cross-domain analysis of large data sets can lead to new discoveries in cancer prevention,
- treatment and diagnosis, and supports the goals of precision medicine and the Cancer Moonshot℠."
- - paragraph: "Users can query the CTDC data via Graphical User Interface (GUI) or APIs. The GUI provides users a distilled set of parameters (faceted querying) they can use to explore the CTDC data. The APIs provide access to the full collection of parameters as seen in the $$[CTDC Data Model](https://github.com/CBIIT/ctdc-model)$$."
- - paragraph: "$$#ANALYSIS- THE CLOUD RESOURCES:#$$"
- - paragraph: "The CRDC has three Cloud Resources ($$[Seven Bridges Cancer Genomics Cloud](http://www.cancergenomicscloud.org/)$$,$$[ISB Cancer Genomics Cloud ](https://isb-cgc.appspot.com/)$$, $$[Terra ](https://terra.bio/)$$),
- each providing analysis platforms for the community to use when working with CRDC data."
- - paragraph: "These cloud-based platforms eliminate the need for researchers to download and store extremely large data sets by allowing them to bring
- analysis tools to the data in the cloud, instead of the traditional process of bringing the data to the tools on local hardware. The Cloud Resources
- also provide access to on-demand computational capacity to analyze these data. The Cloud Resources allow users to run best practice tools and pipelines
- already implemented or upload their own data or analysis methods to workspaces."
- - paragraph: "All three Cloud Resources provide support for data access through a web-based user interface in addition to programmatic access to analytic
- tools and workflows, and the capability of sharing results with collaborators. Each Cloud Resource is continually developing new functionality to improve
- the user experience and add new tools for researchers."
- - paragraph: "$$*SEVEN BRIDGES CANCER GENOMICS CLOUD (SBG)*$$ is hosted on Amazon Web Services and has a rich user interface that allows researchers to find data of
- interest and combine it with their own private data. Data can be analyzed using more than 200 preinstalled, curated bioinformatics tools and workflows.
- Researchers can also extend the functionality of the platform by adding their own data and tools via an intuitive software development kit."
- - paragraph: "$$*INSTITUTE FOR SYSTEMS BIOLOGY CANCER GENOMICS CLOUD (ISB-CGC)*$$ leveraging many aspects of the Google Cloud Platform, allows scientists to
- interactively define and compare cohorts, examine underlying molecular data for specific genes and pathways, and share insights with collaborators.
- For computational users, Application Program Interfaces (APIs) and Google Cloud Platform (GCP) resources such as BigQuery and Google Pipeline service,
- allow complex queries from R or Python scripts, or Dockerized workflows to run on data available in the Google Cloud Storage."
- - paragraph: "$$*BROAD INSTITUTE- TERRA*$$ is an open, standards-based platform for performing production-scale data analysis in the cloud. Built on the Google
- Cloud Platform, Terra empowers analysts, tool developers, and production managers to run large-scale analysis and to share results with collaborators.
- Users can upload their own analysis methods and data to workspaces or run the Broad’s best practice tools and pipelines."
- - paragraph: "$$*FROM CTDC TO CLOUD RESOURCES:*$$"
- - image: "MyCases-Wizard-Step4-SVG"
- - paragraph: "Researchers find cases/cohorts in CTDC and then identify the files they would like to use for analysis. This list of files is called a Manifest.
- The user will download the Manifest and then upload the Manifest into SBG where the files will be available for analysis. The user will need a SBG account.
- The Manifest file is a text file consisting of CRDC Identifiers and, on uploading to SBG, the user will be able to access the relevant data files and see
- some basic case information."
- - paragraph: "This Cloud Resource analysis model eliminates the need for researchers to download and store extremely large data sets by allowing them to bring
- analysis tools to the data in the cloud, instead of the traditional process of bringing the data to the tools on local hardware. The Cloud Resources also
- provide access to on-demand computational capacity to analyze these data, allow users to run best practice tools and pipelines already implemented, and
- upload their own data or analysis methods to workspaces."
- - paragraph: "All three Cloud Resources provide support for data access through a web-based user interface in addition to programmatic access to analytic tools
- and workflows, and the capability of sharing results with collaborators. Each Cloud Resource is continually developing new functionality to improve the user
- experience and add new tools for researchers."
- - paragraph: "Currently the CTDC supports analysis via the SBG Cloud Resource."
-- page: "/developers"
- title: "Developers"
+ - paragraph: "CTDC is not accepting external data submissions at this time. For more information on how to submit data to other data repositories within the Cancer Research Data Commons, please see $$[here](https://datascience.cancer.gov/data-commons)$$."
+- page: '/developers'
+ title: "For Developers"
primaryContentImage: https://raw.githubusercontent.com/CBIIT/datacommons-assets/ctdc_Assets/ctdc/images/aboutPages/About_Developers.png
content:
- - paragraph: "The CTDC System features a GraphQL API to enable querying of the entire data model."
- - paragraph: "https://ctdc-dev.bento-tools.org/graphql/"
- - paragraph: "The CTDC metadata is open to the public and will not require authentication to the system. However, some clinical data and all Data Files are not public so require permission."
- - paragraph: "Users can query the CTDC data via Graphical User Interface (GUI) or API. The GUI provides users a distilled set of parameters (faceted querying) they can use to explore a subset of the CTDC data model. The API provide access to the full collection of parameters as seen in the $$[CTDC Data Model](https://ctdc-dev.bento-tools.org/#/ctdc-data-model)$$."
- - paragraph: "The API is provided “as is”; there are no warranties or conditions arising out of usage of these services. "
- - paragraph: "$$#GITHUB:#$$"
- - paragraph: "As a resource for the public and cancer research, $$[the CTDC GitHub repo](https://github.com/CBIIT/bento-ctdc-frontend)$$ is available for research,
- usage, forking, and pull requests. The codebase is intended for sharing and building frameworks for related initiatives
- and projects. The CTDC GitHub repo has documentation about how to access the system, including endpoints and recommendations for
- tools and example queries. Both the project and documentation are currently maintained and updated. "
- - paragraph: "CTDC is based on a Graph database, and features a GraphQL API (Java) and a React front-end (JavaScript).
- Each tier in the application stack is designed to be modular and adaptable for a variety of use-cases and scenarios. "
-- page: '/support'
- title: "Support"
- primaryContentImage: https://raw.githubusercontent.com/CBIIT/datacommons-assets/ctdc_Assets/ctdc/images/aboutPages/About_Support.png
- content:
- - paragraph: "If you have any questions, please contact us at $$[CTDCHelpDesk@mail.nih.gov](CTDCHelpDesk@mail.nih.gov)$$"
-- page: '/request-access'
- title: "Request Access"
- primaryContentImage: https://raw.githubusercontent.com/CBIIT/datacommons-assets/ctdc_Assets/ctdc/images/aboutPages/About_RequestAccess.png
- content:
- - paragraph: "CTDC provides open access to baseline information on the cases present in CTDC. However, the detailed clinical information and associated sequencing data files for cases presented in the CTDC are not open access, they require users to have permission. Requesting permission is a two step process;"
- - paragraph: "STEP 1: Request access through the NCTN Data Archive site."
- - paragraph: "STEP 2: Request access through dbGaP."
- - paragraph: "Once access has been granted, users will be able to view and work with the CTDC data files and clinical information in one of the Cancer Research Data Common’s analysis resources. Currently the CTDC supports analysis via the SBG Cloud Resource ($$[Seven Bridges Cancer Genomics Cloud](http://www.cancergenomicscloud.org/)$$)."
- - paragraph: "$$#STEP 1:#$$"
- - paragraph: "Navigate to the $$[NCTN/NCORP Data Archive](https://nctn-data-archive.nci.nih.gov)$$ site. If you do not have an NCTN account, please create one. Once registered you will be able to request access to the trial of interest. It is suggested to use either the NCT Trial Number or the PubMed ID to search for the trial of interest- each trial in CTDC has an associated NCT Trial number and PubMed ID. A $$[PDF document](https://nctn-data-archive.nci.nih.gov/sites/default/files/RequestPDF.pdf)$$ is available detailing the steps for both registration and access requesting."
- - paragraph: "$$#STEP 2:#$$"
- - paragraph: "Navigate to $$[dbGaP’s Authorized Access](https://dbgap.ncbi.nlm.nih.gov/aa/wga.cgi?page=login)$$ site and login. If you do not have a dbGaP account, please create one.
- If a user is requesting access through a collaborator or their Principal Investigator (PI) who already has access, then the PI/collaborator can add the user as a “Downloader”.
- If a user is requesting access as a PI, then they will need to create a new research project within dbGaP. The following information is useful to add the appropriate dataset to the dbGaP research project request; ARM Q Study Accession ID is phs001926, ARM Z1D Study Accession ID is phs001859."
+ - paragraph: "Users can query the CTDC data via Graphical User Interface (GUI) or Application User Interface (GUI). The CTDC GITHUB repo is also available for those interested in accessing our codebase and documentation."
+ - paragraph: "$$#CTDC GUI#$$"
+ - paragraph: "The GUI provides users a distilled set of parameters (faceted querying) they can use to explore a subset of the CTDC data model. "
+ - paragraph: "$$#CTDC API#$$"
+ - paragraph: "CTDC is based on a Graph database, featuring a $$[ GraphQL API](type:internal url:/#/graphql target:_blank)$$ (Java) and a React front-end (JavaScript). Each tier in the application stack is designed to be modular and adaptable for a variety of use-cases and scenarios. A GraphQL API enables querying of the entire data model. The API is provided “as is:” there are no warranties or conditions arising out of usage of these services."
+ - paragraph: "$$#GITHUB#$$"
+ - paragraph: "The $$[ CTDC GitHub repo](https://github.com/CBIIT/crdc-ctdc-ui)$$ is available for research, usage, forking, and pull requests. The codebase is intended for sharing and building frameworks for related initiatives and projects. The CTDC GitHub repo has documentation about how to access the system, including endpoints and recommendations for tools and example queries. Both the project and documentation are maintained and updated in accordance with major and minor releases"
- page: '/purpose'
title: "Purpose"
primaryContentImage: https://raw.githubusercontent.com/CBIIT/datacommons-assets/ctdc_Assets/ctdc/images/aboutPages/About_Purpose.png
content:
- - paragraph: "The CTDC is an initiative from NCI’s Center for Biomedical Informatics and Information Technology (CBIIT) to expand access to data from precision medicine cancer trials to accelerate cancer research.
- The CTDC as part of the Cancer Research Data Commons provides access to harmonized data from NCI-sponsored clinical trials, including genomic and clinical data from NCI’s $$[Molecular Analysis for Therapy Choice](https://www.cancer.gov/about-cancer/treatment/clinical-trials/nci-supported/nci-match)$$ (NCI-MATCH).
- CTDC allows users to take advantage of NCI’s $$[Cloud Resources](https://datascience.cancer.gov/data-commons/cloud-resources)$$ for easy visualization and bioinformatic analysis of data, with no programming skills required."
-- page: '/ctdc-data-model'
- title: 'CTDC Data Model'
- primaryContentImage: 'https://raw.githubusercontent.com/CBIIT/datacommons-assets/ctdc_Assets/ctdc/images/aboutPages/About_CTDC_Model.png'
- secondaryZoomImageTitle: "The CTDC Data Model"
- secondaryZoomImage: 'https://cbiit.github.io/ctdc-model/model-desc/ctdc-model.svg'
+ - paragraph: "The goals of the CTDC are to advance cancer research and accelerate the development of innovative therapies by improving access to data from NCI-sponsored clinical studies, including genomic panel assay and clinical data. The CTDC does this through: "
+ - paragraph: "$$*Graphical User Interface (GUI)*$$ – The CTDC’s GUI includes an Explore dashboard with search filters to help users visualize, explore, and navigate complex metadata without the need for coding or specialized technical skills. "
+ - paragraph: "$$*Data consolidation:*$$ The CTDC consolidates data from clinical studies funded by the NCI. This allows researchers to analyze data collectively, leading to deeper insights and a better understanding of cancer’s complexities. "
+ - paragraph: "$$*Data harmonization:*$$ Data harmonization ensures that data across studies within the CTDC are standardized and organized in a consistent manner to improve data compatibility, integration, and meta-analysis. "
+ - paragraph: "$$*Integration with NCI Cloud Resources:*$$ Users can easily transfer selected CTDC data to the $$[ Velsera Seven Bridges Cancer Genomics Cloud (SB-CGC)](https://datacommons.cancer.gov/analytical-resource/seven-bridges-cancer-genomics-cloud)$$, a cloud-based platform for cancer research funded by the NCI. Here, researchers can integrate mutli-omic data across sources and leverage access to a multitude of tools and workflows for computation and analysis. "
+ - paragraph: "$$*Fueling collaborative research:*$$ By centralizing data and making them available through NCI’s Cloud Resources, the CTDC promotes secure collaboration among distributed research groups, fostering interdisciplinary partnerships. "
+ - paragraph: "$$*Democratizing Data access:*$$ Data in the CTDC are made available through various access restrictions including open (no registration required) and controlled access (registration required). The CTDC aims to make each dataset as open as possible while protecting participant privacy and adhering to regulations, agreements, and other considerations specific to each study. "
+ - paragraph: "$$*Alignment to F.A.I.R data principles:*$$ The CTDC adheres to Findable, Accessible, Interoperable, and Reusable ($$[FAIR]( https://www.go-fair.org/fair-principles/)$$) principles for scientific data management and stewardship. CTDC seeks to provide clearly organized data and guidance enabling end users to search for, find, and access data of interest. The emphasis on harmonization described above promotes the interoperability of data within and across the CRDC ecosystem and beyond and promotes reusability of data beyond the primary publication."
+- page: '/support'
+ title: "Support"
+ primaryContentImage: https://raw.githubusercontent.com/CBIIT/datacommons-assets/ctdc_Assets/ctdc/images/aboutPages/About_Support.png
content:
- - paragraph: "$$#HARMONIZATION/INTEGRATION:#$$"
- - paragraph: "The CTDC functions best for the research community when the data is integrated. Once a project
- is accepted into the CTDC, the CTDC data team work with the submitting team to review the data looking
- at data structure, data values, data quality as well as identifying any standards that were utilized.
- Based on that review, a plan for how to submit the data will be agreed upon between CTDC and the submitter and the plan implemented. "
- - paragraph: "$$#FAIR AND CITING:#$$"
- - paragraph: "The CTDC will adhere to $$[FAIR](https://www.go-fair.org/fair-principles/)$$ principles of data stewardship: Findable, Accessible,
- Interoperable, and Reusable. Please credit the CTDC in your manuscript. When citing individual projects,
- please refer to the attribution policies of the project when available. "
- - paragraph: "$$#LICENSE#$$"
- - paragraph: "Data made available through the CTDC is for research purposes only. The CTDC provides researchers with
- access to data from canine cancer studies to enable exploratory analysis that cannot be considered definitive for
- outcomes. All data is publicly available. "
- - paragraph: "$$#DATA MODEL & DICTIONARY:#$$"
- - paragraph: "The CTDC data model is a representation of how all the constituent data are arranged relative to each
- other. Given the number of studies, the range of study types and the multiple data types that the CTDC needs
- to support, the data model will need to adapt to the needs of the science. The data model is not static and
- is expected to change as new needs are identified. The data dictionary can be found $$[here](https://ctdc-dev.bento-tools.org/#/data-dictionary)$$."
- - paragraph: "The SVG graphic below represents the current CTDC data model consisting of data nodes,
- node properties, and relationships (edges). It provides a comprehensive mapping of the system data,
- part of which may be viewed in the application interface and UI. In other words, additional nodes and
- properties are available for inspection and querying beyond those presented on the front-end. "
- - paragraph: "Additionally, the CTDC Data Model serves as a template for similar initiatives and data structures,
- including graph-based database schemas. The model will continue to evolve as data needs are further discerned. "
- - paragraph: "The tool used to generate this visual may be sourced on Github at: https://github.com/CBIIT/ctdc-model"
- - paragraph: "The entire CTDC data model can be queried via API: https://ctdc-dev.bento-tools.org/graphql/"
-- page: '/data-dictionary'
- title: 'CTDC Data Dictionary '
+ - paragraph: "If you have any questions, please contact us at $$[CTDCHelpDesk@mail.nih.gov](CTDCHelpDesk@mail.nih.gov)$$"
+- page: '/cloud-computing'
+ title: "Cloud computing"
+ primaryContentImage: https://raw.githubusercontent.com/CBIIT/datacommons-assets/ctdc_Assets/ctdc/images/aboutPages/About_CRDC.png
content:
- title: "CTDC Nodes, Conventions, and Data Dictionary"
- introduction: "Clinical trials in precision medicine generate large volumes of complex biomedical data types- patient metadata, molecular variant profiles and treatment outcomes. To address this complexity CTDC has used a graph structure to model a clinical trial workflow. Key trial entities have been modelled as node types, while the association between nodes have been explicitly modelled as relationship types. There are 18 node types and 20 relationship types in the CTDC data model, these are listed in the section CTDC Node Types and Table 1, respectively. Both nodes and relationships store data attributes in the form of properties. These data attributes are listed in the section CTDC Attribute Types. The CTDC graph data model has been implemented in Neo4J, a commercial graph database platform."
- nodeTypes:
- - nodeTypeTitle: "clinical_trial"
- nodeTypeDescription: "An interventional clinical research study that is represented within the CTDC, in terms of its design, data and key results."
- nodeTypeNUmber: "1"
- - nodeTypeTitle: "arm"
- nodeTypeDescription: "A treatment arm tests a single therapeutic agent against a set of genomic aberrations. Each arm may accrue at most 35 patients; for arms studying more common genomic aberrations up to 70 patients may be enrolled. Each treatment arm has a set of inclusion and exclusion criteria."
- nodeTypeNUmber: "2"
- - nodeTypeTitle: "case"
- nodeTypeDescription: "An individual who has consented to participate in and has registered in the NCI-MATCH clinical trial. A case may be assigned to zero or more arms."
- nodeTypeNUmber: "3"
- - nodeTypeTitle: "metastatic_site"
- nodeTypeDescription: "An anatomic site, that is not the primary site of disease, in trial participant, where cancerous cells have been detected. A case may have zero or more metastatic_sites."
- nodeTypeNUmber: "4"
- - nodeTypeTitle: "specimen"
- nodeTypeDescription: "A material sample extracted from a case. One or more specimens may be associated with a case. Specimens may be of type tumor or normal."
- nodeTypeNUmber: "5"
- - nodeTypeTitle: "ihc_assay_report"
- nodeTypeDescription: "A report generated from immunohistochemistry (IHC) tests performed on a specimen. In general, the expression of four genes: PTEN, MLH1, MSH2 and RB, is assayed by a single IHC assay run. Only one ihc_assay_report, per gene, is associated with a specimen."
- nodeTypeNUmber: "6"
- - nodeTypeTitle: "nucleic_acid"
- nodeTypeDescription: "An aliquot of nucleic acid prepared from a specimen."
- nodeTypeNUmber: "7"
- - nodeTypeTitle: "sequencing_assay"
- nodeTypeDescription: "A sequencing test performed on the nucleic_acid aliquot to identify genomic abnormalities present in the specimen. A case may have multiple sequencing assays."
- nodeTypeNUmber: "8"
- - nodeTypeTitle: "file"
- nodeTypeDescription: "The output(s) of a sequencing test and associated bioinformatics pipelines. These may include raw sequence files, aligned read files and files of called variants."
- nodeTypeNUmber: "9"
- - nodeTypeTitle: "variant_report"
- nodeTypeDescription: "The report generated from processing the output of the sequencing_assay, listing the genomic abnormalities identified in the specimen. Only one variant_report is associated with a sequencing_assay. A case may have multiple variant_reports."
- nodeTypeDescriptionParagraphTwo: "The variant_report lists genomic variants of the represented by the following CTDC node types:"
- nodeTypeNUmber: "10"
- nodeTypeSubList:
- - nodeTypeSubListTitle: "snv_variant"
- nodeTypeSubListDescription: " sequence change where, compared to a reference sequence, one nucleotide is replaced by one nucleotide. This variant type corresponds to the substitution variant type in HGVS nomenclature. A variant_report may have zero or more snv_variants."
- - nodeTypeSubListTitle: "delins_variant"
- nodeTypeSubListDescription: "a sequence change where, compared to a reference sequence, one or more nucleotides are replaced by one or more other nucleotides. This variant type corresponds to the deletion-insertion variant type in HGVS nomenclature. A variant_report may have zero or more delins_variants."
- - nodeTypeSubListTitle: "indel_variant"
- nodeTypeSubListDescription: "a sequence change where a set of nucleotides are either inserted into or deleted from a genome sequence. This variant type includes the deletion and insertion variant types in HGVS nomenclature. A variant_report may have zero or more indel_variants."
- - nodeTypeSubListTitle: "copy_number_variant"
- nodeTypeSubListDescription: "a sequence change that results in the whole or partial gain or loss of copies of a gene. The MATCH clinical trial tests for copy number gain only. A variant_report may have zero or more copy_number_variants."
- - nodeTypeSubListTitle: "gene_fusion_variant"
- nodeTypeSubListDescription: "a chromosomal rearrangement that results in a hybrid gene. A variant_report may have zero or more gene_fusion_variants."
- - nodeTypeTitle: "assignment_report"
- nodeTypeDescription: "A report generated by the MATCHBox Rules Engine that summarizes the result of matching a patient’s: (a) disease and prior therapies (b) genomic variants (c) IHC test results, against the inclusion and exclusion criteria of all open treatment arms. The Assignment report lists the result of matching the patient’s data against each treatment arm. A case may have multiple assignment_reports."
- nodeTypeNUmber: "11"
- - nodeTypeTitle: "disease_eligibility_criterion"
- nodeTypeDescription: "A disease diagnosis that determines a case’s participation in an arm of a clinical trial. Disease eligibility criteria may be inclusionary or exclusionary."
- nodeTypeNUmber: "12"
- - nodeTypeTitle: "drug_eligibility_criterion"
- nodeTypeDescription: "A prior or current drug therapy that determines a case’s participation in an arm of a clinical trial. Drug eligibility criteria may be inclusionary or exclusionary."
- nodeTypeNUmber: "13"
- nodeTypeRelationShipTable:
- - head:
- - "Relationship Name"
- - "Source Node"
- - "Destination Node"
- - "Multiplicity"
- - body:
- - row:
- - "of_case"
- - "specimen"
- - "case"
- - 'many_to_one'
- - row:
- - "of_specimen"
- - "ihc_assay_report"
- - "specimen"
- - "many_to_one"
- - row:
- - "of_specimen"
- - "nucleic_acid"
- - "specimen"
- - "many_to_one"
- - row:
- - "of_nucleic_acid"
- - "sequencing_assay"
- - "nucleic_acid"
- - "many_to_one"
- - row:
- - "of_sequencing_assay"
- - "file"
- - "sequencing_assay"
- - "many_to_one"
- - row:
- - "of_sequencing_assay"
- - "variant_report"
- - "sequencing_assay"
- - "many_to_one"
- - row:
- - "snv_variant_of"
- - "snv_variant"
- - "variant_report"
- - "many_to_many"
- - row:
- - "delins_variant_of"
- - "delins_variant"
- - "variant_report"
- - "many_to_many"
- - row:
- - "indel_variant_of"
- - "indel_variant"
- - "variant_report"
- - "many_to_many"
- - row:
- - "copy_number_variant_of"
- - "copy_number_variant"
- - "variant_report"
- - "many_to_many"
- - row:
- - "gene_fusion_variant_of"
- - "gene_fusion_variant"
- - "variant_report"
- - "many_to_many"
- - row:
- - "of_variant_report"
- - "assignment_report"
- - "variant_report"
- - "many_to_one"
- - row:
- - "of_arm"
- - "assignment_report"
- - "arm"
- - "many_to_one"
- - row:
- - "of_trial"
- - "arm"
- - "clinical_trial"
- - "many_to_one"
- - row:
- - "of_arm"
- - "case"
- - "arm"
- - "many_to_many"
- - row:
- - "of_specimen"
- - "assignment_report"
- - "specimen"
- - "many_to_one"
- - row:
- - "met_site_of"
- - "metastatic_site"
- - "case"
- - "many_to_many"
- - row:
- - "of_arm"
- - "file"
- - "arm"
- - "many_to_one"
- - row:
- - "of_arm"
- - "disease_eligibility_criterion"
- - "arm"
- - "many_to_many"
- - row:
- - "of_arm"
- - "drug_eligibility_criterion"
- - "arm"
- - "many_to_many"
- documentConventionsTitle: "Descriptors for Each Attribute"
- documentConventionsDescription: "An interventional clinical research study that is represented within the CTDC, in terms of its design, data and key results. "
- documentConventions:
- - attributeName: "Attribute Name - "
- attributeConventions: "name of the attribute."
- - attributeName: "Definition - "
- attributeConventions: "a concise description of the attribute. Attribute definitions have been developed by the CTDC data team and are specific to the CTDC data model."
- - attributeName: "Attribute of Node/Relationship - "
- attributeConventions: "the parent node or relationship type of attribute."
- - attributeName: "Display Name - "
- attributeConventions: "the display name of attribute on CTDC user interface. Display names are printed for only those attributes that displayed are in the current version of the user interface. All attributes listed below are available via CTDC’s GraphQL API."
- - attributeName: "Required - "
- attributeConventions: "a true/false label that indicates if attribute is a required field for CTDC data submitters."
- - attributeName: "Type - "
- attributeConventions: "the data type of the attribute (string, integer, number, boolean)."
- - attributeName: "Constraints - "
- attributeConventions: "rules enforced on attribute values."
- - attributeName: "Enumeration - "
- attributeConventions: "If applicable, the list of possible values for the attribute"
- attribute:
- - row:
- - "case"
- - "A unique numerical identifier assigned to each case by CTDC."
- - "case"
- - "Case ID"
- - "true"
- - "string"
- - "Is Unique"
- - "None"
- - row:
- - "gender"
- - "Gender of case"
- - "case"
- - "Gender"
- - "true"
- - "string"
- - "Of Enumeration"
- - "MALE, FEMALE, UNKNOWN"
- - row:
- - "race"
- - "Race of case"
- - "case"
- - "Race"
- - "true"
- - "string"
- - "Of Enumeration"
- - "AMERICAN_INDIAN_OR_ALASKA_NATIVE, ASIAN, BLACK_OR_AFRICAN_AMERICAN, NATIVE_HAWAIIAN_OR_OTHER_PACIFIC_ISLANDER, NOT_REPORTED, UNKNOWN, WHITE"
- - row:
- - "ethnicity"
- - "Ethnicity of case"
- - "case"
- - "Ethnicity"
- - "true"
- - "string"
- - "Of Enumeration"
- - "HISPANIC_OR_LATINO, NOT_HISPANIC_OR_LATINO, UNKNOWN"
- - row:
- - "patient_status"
- - "Status of case within the MATCH workflow"
- - "case"
- - ""
- - "true"
- - "string"
- - "Of Enumeration"
- - "COMPASSIONATE_CARE, OFF_TRIAL, OFF_TRIAL_BIOPSY_EXPIRED, OFF_TRIAL_DECEASED, OFF_TRIAL_NOT_CONSENTED, OFF_TRIAL_NO_TA_AVAILABLE, OFF_TRIAL_REGISTRATION_ERROR, ON_TREATMENT_ARM, PENDING_APPROVAL, PENDING_CONFIRMATION, PTEN_ORDER_REQUESTED, REGISTRATION, REGISTRATION_OUTSIDE_ASSAY"
- - row:
- - "current_step"
- - "The MATCH workflow is divided broadly into steps. Step 0 starts with a case registering and ends with the sign-out of the first assignment report. If case is assigned to a treatment arm s/he moves to step 1. If case progresses during treatment, s/he is assigned to step 2. Current step denotes the step in the workflow the case is currently at."
- - "case"
- - ""
- - "true"
- - "integer"
- - "patient_status >= 0"
- - "None"
- - row:
- - "disease"
- - "Disease condition diagnosed in a case."
- - "case"
- - "Diagnosis"
- - "true"
- - "string"
- - "None"
- - "None"
- - row:
- - "ctep_category"
- - "The patient's disease diagnosis described according to the CTEP Simplified Disease Classification."
- - "case"
- - ""
- - "false"
- - "string"
- - "None"
- - "None"
- - row:
- - "ctep_subcategory"
- - "A more granular mapping of the patient's disease diagnosis to the CTEP Simplified Disease "
- - "case"
- - ""
- - "false"
- - "string"
- - "None"
- - "None"
- - row:
- - "meddra_code"
- - "MedDRA code that is assigned to patient's disease diagnosis and that maps to the CTEP subcategory."
- - "case"
- - ""
- - "false"
- - "string"
- - "None"
- - "none"
- - row:
- - "prior_drugs"
- - "A list of drugs prescribed to patient prior to study registration."
- - "case"
- - ""
- - "false"
- - "string"
- - "None"
- - "None"
- - row:
- - "extent_of_disease"
- - "Extent of disease at trial entry."
- - "case"
- - ""
- - "false"
- - "string"
- - "of Enumeration"
- - "Locally advanced, Metastatic, Recurrent"
- - row:
- - "ecog_performance_status"
- - "ECOG Performance Status at study entry (integer values 0 to 5 – only 0 and 1 are eligible for NCI-MATCH). Source is ECOG-ACRIN."
- - "case"
- - ""
- - "false"
- - "integer"
- - "0<= ecog_performance_status <= 5"
- - "None"
- - row:
- - "specimen_id"
- - "A unique identifier assigned to each MATCH specimen by CTDC."
- - "specimen"
- - ""
- - "true"
- - "string"
- - "Is Unique"
- - "None"
- - row:
- - "The type of material specimen extracted from a case."
- - "Biopsy type."
- - "specimen"
- - ""
- - "true"
- - "string"
- - "Of Enumeration"
- - "Tumor, Normal, Unknown"
- - row:
- - "met_site_id"
- - "A unique identifier assigned to a metastatic site by CTDC."
- - "metastatic_site"
- - ""
- - "true"
- - "string"
- - "Is Unique"
- - "None"
- - row:
- - "metastatic_site_name"
- - "Name of anatomic site where metastasis was detected at trial entry"
- - "metastatic_site"
- - ""
- - "true"
- - "string"
- - "Of Enumeration"
- - "Bone Marrow, Bone, Lung, Pleura, Liver, Kidney, Stomach, Small intestine, Large intestine, Skin, Orbit, Conjunctiva, Parotid, Submandibular, Testicle, Epidural, CNS-Brain, CNS-Leptom, Pericardium, Lymph Nodes, Adrenals, Other"
- - row:
- - "aliquot_id"
- - "A unique identifier assigned to an aliquot of nucleic acid, prepared from a specimen, by CTDC."
- - "nucleic_acid"
- - ""
- - "true"
- - "string"
- - "Is Unique"
- - "None"
- - row:
- - "nucleic_acid_concentration"
- - "Concentration of nucleic acid aliquot extracted from a specimen."
- - "nucleic_acid"
- - ""
- - "true"
- - "number"
- - "nucleic_acid_concentration > 0.0"
- - "None"
- - row:
- - "nucleic_acid_volume"
- - "Volume of nucleic acid aliquot extracted from a specimen."
- - "nucleic_acid"
- - ""
- - "true"
- - "number"
- - "nucleic_acid_volume > 0.0"
- - "None"
- - row:
- - "nucleic_acid_type"
- - "Nucleic acid type of extract."
- - "nucleic_acid"
- - ""
- - "true"
- - "string"
- - "Of Enumeration"
- - "DNA, RNA, Pooled DNA/cDNA, Unknown"
- - row:
- - "ihc_assay_id"
- - "A unique identifier assigned to each IHC assay report by CTDC."
- - "ihc_assay_report"
- - ""
- - "true"
- - "string"
- - "Is Unique"
- - "None"
- - row:
- - "ihc_test_gene"
- - "Gene symbol of gene being assayed for expression by an immunohistochemistry (IHC) test.
- Attribute of Node: ihc_assay_report"
- - "ihc_assay_report"
- - ""
- - "true"
- - "string"
- - "Of Enumeration"
- - "PTEN, RB, MLH1, MSH2"
- - row:
- - "ihc_test_result"
- - "The result of the IHC test performed on gene"
- - "ihc_assay_report"
- - ""
- - "true"
- - "string"
- - "Of Enumeration"
- - "EXPRESSED, LOST, INDETERMINATE, UNKNOWN"
- - row:
- - "qc_result"
- - "Sequencing QC result"
- - "sequencing_assay"
- - ""
- - "false"
- - "string"
- - "None"
- - "None"
- - row:
- - "variant_report_id"
- - "A unique identifier assigned to each variant report by CTDC."
- - "variant_report"
- - ""
- - "true"
- - "string"
- - "Is Unique"
- - "None"
- - row:
- - "mapd"
- - "Median Absolute Percentage Deviation (MAPD) is a measure of the accuracy of the copy number calls made by the copy number variant pipeline"
- - "variant_report"
- - ""
- - "false"
- - "number"
- - "0.0 < mapd < = 0.5"
- - "None"
- - row:
- - "cellularity"
- - "The fraction of specimen estimated to be comprised of tumor cells."
- - "variant_report"
- - ""
- - "false"
- - "string"
- - "None"
- - "None"
- - row:
- - "file_description"
- - "Optional description of the file and its content."
- - "file"
- - ""
- - "false"
- - "string"
- - "None"
- - "None"
- - row:
- - "file_format"
- - "The specific format of the file as derived by the loader"
- - "file"
- - ""
- - "true"
- - "string"
- - "None"
- - "None"
- - row:
- - "file_location"
- - "S3 bucket location of file in CTDC."
- - "file"
- - ""
- - "true"
- - "string"
- - "Is Unique"
- - "None"
- - row:
- - "file_name"
- - "File name as assigned by parent project"
- - "file"
- - "File Name"
- - "true"
- - "string"
- - "None"
- - "None"
- - row:
- - "file_size"
- - "Size of the file as derived by the loader"
- - "file"
- - "Size"
- - "true"
- - "number"
- - "None"
- - "None"
- - row:
- - "file_type"
- - "Indicator as to the nature of the file in terms of its content."
- - "file"
- - "File Type"
- - "true"
- - "string"
- - "Of Enumeration"
- - "Raw reads file, Aligned DNA reads file, Aligned RNA reads file, Index file, Variants file"
- - row:
- - "md5sum"
- - "A unique id assigned by CTDC."
- - "file"
- - ""
- - "true"
- - "string"
- - "Is Unique"
- - "None"
- - row:
- - "uuid"
- - "A unique id assigned by CTDC."
- - "file"
- - ""
- - "true"
- - "string"
- - "Is Unique"
- - "None"
- - row:
- - "variant_id"
- - "A unique identifier assigned to each variant by CTDC."
- - "snv_variant, delins_variant, indel_variant, copy_number_variant, gene_fusion_variant"
- - ""
- - "true"
- - "string"
- - "Is Unique"
- - "None"
- - row:
- - "external_variant_id"
- - "The COSMIC ID of variant, if available, or another source project assigned ID."
- - "snv_variant, delins_variant, indel_variant, copy_number_variant, gene_fusion_variant"
- - ""
- - "false"
- - "string"
- - "Is Unique"
- - "None"
- - row:
- - "gene"
- - "Gene symbol of gene that has been identified as having a variant number of copies."
- - "snv_variant, delins_variant, indel_variant, copy_number_variant"
- - ""
- - "true"
- - "string"
- - "valid gene name"
- - "None"
- - row:
- - "chromosome"
- - "The chromosome that encodes variant gene."
- - "snv_variant, delins_variant, indel_variant, copy_number_variant"
- - ""
- - "true"
- - "string"
- - "Of Enumeration"
- - "chr1, chr2, chr3, chr4, chr5, chr6, chr7, chr8, chr9, chr10, chr11, chr12, chr13, chr14, chr15, chr16, chr17, chr18, chr19, chr20, chr21, chr22, chrX, chrY"
- - row:
- - "exon"
- - "The exon that encodes the variant nucleotide position."
- - "snv_variant, delins_variant, indel_variant"
- - ""
- - "true"
- - "integer"
- - "exon > 0"
- - "None"
- - row:
- - "position"
- - "The chromosomal location of nucleotide variant on the positive strand."
- - "snv_variant, delins_variant, indel_variant"
- - ""
- - "true"
- - "integer"
- - "position > 0"
- - "None"
- - row:
- - "reference"
- - "Reference allele at variant position"
- - "snv_variant, delins_variant, indel_variant"
- - ""
- - "true"
- - "string"
- - "string characters in (A, T, G, C, -)"
- - "None"
- - row:
- - "alternative"
- - "Alternative nucelotide identified at variant position."
- - "snv_variant, delins_variant, indel_variant"
- - ""
- - "true"
- - "string"
- - "string characters in (A, T, G, C, -)"
- - "None"
- - row:
- - "transcript_id"
- - "NCBI Identifier of the transcript isoform chosen for reporting the variant."
- - "snv_variant, delins_variant, indel_variant"
- - ""
- - "true"
- - "string"
- - "None"
- - "None"
- - row:
- - "transcript_hgvs"
- - "The transcript level change annotated in the HGVS format."
- - "snv_variant, delins_variant, indel_variant"
- - ""
- - "false"
- - "string"
- - "None"
- - "None"
- - row:
- - "genomic_hgvs"
- - "The genomic change annotated in the HGVS format."
- - "snv_variant, delins_variant, indel_variant"
- - ""
- - "false"
- - "string"
- - "Is Unique"
- - "None"
- - row:
- - "oncomine_variant_class"
- - "Variant classification label assigned by the variant calling workflow of the Ion Reporter Software."
- - "snv_variant, delins_variant, indel_variant, copy_number_variant, gene_fusion_variant"
- - ""
- - "false"
- - "string"
- - "Of Enumeration"
- - "Amplification, Deletion, Deleterious, Hotspot, Fusion"
- - row:
- - "variant_classification"
- - "Variant classification based on the effect of genomic change."
- - "snv_variant, delins_variant, indel_variant"
- - ""
- - "true"
- - "string"
- - "None"
- - "None"
- - row:
- - "amino_acid_change"
- - "The amino acid change resulting from the nucleotide-level change."
- - "snv_variant, delins_variant, indel_variant"
- - ""
- - "true"
- - "string"
- - "None"
- - "None"
- - row:
- - "tumor_suppressor"
- - "A flag that indicates if variant gene is a tumor suppressor."
- - "copy_number_variant"
- - ""
- - "false"
- - "boolean"
- - "Is Unique"
- - "None"
- - row:
- - "gene1"
- - "Gene symbol of upstream partner of a fusion variant."
- - "gene_fusion_variant"
- - ""
- - "true"
- - "string"
- - "valid gene name"
- - "None"
- - row:
- - "gene2"
- - "Gene symbol of downstream partner of a fusion variant."
- - "gene_fusion_variant"
- - ""
- - "true"
- - "string"
- - "valid gene name"
- - "None"
- - row:
- - "assignment_report_id"
- - "A unique identifier assigned to each assignment report by CTDC"
- - "assignment_report"
- - ""
- - "true"
- - "string"
- - "Is Unique"
- - "None"
- - row:
- - "step_at_assignment"
- - "The step in the MATCH workflow at which patient has been assigned to treatment arm."
- - "assignment_report"
- - ""
- - "false"
- - "integer"
- - "step_at_assignment > 0"
- - "None"
- - row:
- - "assignment_logic"
- - "The rationale for assigning treatment arm."
- - "assignment_report"
- - ""
- - "false"
- - "string"
- - "None"
- - "None"
- - row:
- - "assignment_outcome"
- - "A status describing outcome of assigning patient to treatment arm"
- - "assignment_report"
- - ""
- - "true"
- - "string"
- - "Of Enumeration"
- - "COMPASSIONATE_CARE, FORMERLY_ON_ARM_DECEASED, FORMERLY_ON_ARM_OFF_TRIAL, FORMERLY_ON_ARM_PROGRESSED, NOT_ELIGIBLE, OFF_TRIAL, OFF_TRIAL_DECEASED, OFF_TRIAL_NO_TA_AVAILABLE, OFF_TRIAL_NOT_CONSENTED, ON_TREATMENT_ARM, PENDING_APPROVAL, PENDING_CONFIRMATION"
- - row:
- - "treatment_outcome"
- - "Best Confirmed Response. Source is ECOG-ACRIN."
- - "assignment_report"
- - ""
- - "false"
- - "string"
- - "Of Enumeration"
- - "Complete response, Partial response, Stable disease, Progressive disease, Not evaluable"
- - row:
- - "arm_id"
- - "A unique identifier assigned to a trial arm."
- - "arm"
- - "Treatment Arm"
- - "true"
- - "string"
- - "Of Enumeration"
- - "A, C2, E, L, T, V, Z1E, Z1G, Z1H, Z1K, Z1L, Z1J, Z1M, C1, J, K1, K2, M, Z1C, Z1F, F, G, H, R, U, S1, S2, X, Z1A, B, Z1B, Z1D, Z1I, I, N, P, Q, W, Y"
- - row:
- - "arm_target"
- - "A concise description of genomic aberrations being targeted by trial arm."
- - "arm"
- - "Treatment Arm Target"
- - "true"
- - "string"
- - "None"
- - "None"
- - row:
- - "arm_drug"
- - "The therapeutic agent being tested against the target genomic aberrations."
- - "arm"
- - "Treatment Arm Drug"
- - "true"
- - "string"
- - "None"
- - "None"
- - row:
- - "pubmed_id"
- - "PubMed ID of the primary publication associated with trial arm."
- - "arm"
- - "PubMed ID"
- - "true"
- - "string"
- - "None"
- - "None"
- - row:
- - "clinical_trial_id"
- - "The Trial ID assigned to trial by ClinicalTrials.gov"
- - "clinical_trial"
- - "Trial ID"
- - "true"
- - "string"
- - "Is Unique"
- - "None"
- - row:
- - "clinical_trial_description"
- - "A brief description of clinical trial."
- - "clinical_trial"
- - "Trial Description"
- - "true"
- - "string"
- - "None"
- - "None"
- - row:
- - "clinical_trial_designation"
- - "A concise name for clinical trial eg. NCI-MATCH"
- - "clinical_trial"
- - "Trial Code"
- - "true"
- - "string"
- - "None"
- - "None"
- - row:
- - "clinical_trial_long_name"
- - "The complete name of the clinical trial, e.g. Targeted Therapy Directed by Genetic Testing in Treating Patients with Advanced Refractory Solid Tumors, Lymphomas, or Multiple Myeloma (The MATCH Screening Trial)."
- - "clinical_trial"
- - "Trial Name"
- - "true"
- - "string"
- - "None"
- - "None"
- - row:
- - "clinical_trial_type"
- - "Clinical trial type"
- - "clinical_trial"
- - "Trial Type"
- - "true"
- - "string"
- - "None"
- - "None"
- - row:
- - "lead_organization"
- - "Lead Organization"
- - "clinical_trial"
- - "Lead Organization"
- - "true"
- - "string"
- - "None"
- - "None"
- - row:
- - "principal_investigators"
- - "Principal Investigators"
- - "clinical_trial"
- - "Principal Investigators"
- - "true"
- - "string"
- - "None"
- - "None"
- - row:
- - "allele_frequency"
- - "Frequency of alternative nucleotide as determined by number of reads carrying alternative nucleotide as a fraction of total number of reads mapping to variant position."
- - ""
- - ""
- - "true"
- - "number"
- - "allele_frequency > 0.0"
- - "None"
- - "snv_variant_of, delins_variant_of, indel_variant_of"
- - row:
- - "copy_number"
- - "The estimated number of copies for the copy number variant gene."
- - ""
- - ""
- - "true"
- - "number"
- - "copy_number >= 0"
- - "None"
- - "copy_number_variant_of"
- - row:
- - "copy_number_ci_5"
- - "The lower limit of the 95% confidence interval for the estimated copy number."
- - ""
- - ""
- - "true"
- - "number"
- - "copy_number_ci_5 >= 0"
- - "None"
- - "copy_number_variant_of"
- - row:
- - "copy_number_ci_95"
- - "The upper limit of the 95% confidence interval for the estimated copy number."
- - "copy_number_variant_of"
- - ""
- - "true"
- - "number"
- - "copy_number_ci_95 >= 0"
- - "None"
- - "copy_number_variant_of"
- - row:
- - "gene1_read_count"
- - "Number of reads mapping to the upstream gene partner."
- - ""
- - ""
- - "true"
- - "integer"
- - "gene1_read_count > 0"
- - "None"
- - "gene_fusion_variant_of"
- - row:
- - "gene2_read_count"
- - "Number of reads mapping to the downstream gene partner."
- - ""
- - ""
- - "true"
- - "integer"
- - "gene2_read_count > 0"
- - "None"
- - "gene_fusion_variant_of"
+ - paragraph: "$$#CTDC and NCI’s Cloud Resources#$$ "
+ - paragraph: "The CTDC supports analysis via the $$[Seven Bridges Cancer Genomics Cloud (SB-CGC)](https://datacommons.cancer.gov/analytical-resource/seven-bridges-cancer-genomics-cloud-developed-velsera#)$$. SB-CGC supports data access through a web-based user interface, programmatic access to analytic tools and workflows, and collaborative data analysis and sharing pipelines. Users can transfer data of interest from the CTDC directly to SB-CGC, eliminating the need to download and store extremely large data sets. Through the SB-CGC, researchers can bring analysis tools to the data in the cloud, instead of the traditional process of bringing the data to the tools on local hardware. Analyzing data through the cloud offers many benefits including: "
+ - listWithNumbers :
+ - "State of the art analysis using high-performance computing"
+ - "Remote access and flexibility for nationally or globally distributed teams"
+ - "On-demand computational capacity to scale resources as needed "
+ - paragraph: "Data brought to the SB-CGC can be analyzed using more than 200 preinstalled, curated bioinformatics tools and workflows. Researchers can also extend the functionality of the platform by adding their own data and tools via an intuitive software development kit. "
+ - paragraph: "$300 in credits are available to new users who want to test out the platform. "
+ - paragraph: "For more information on getting started with SB-CGC including onboarding videos and more, visit: $$[https://www.cancergenomicscloud.org/getting-started](https://www.cancergenomicscloud.org/getting-started )$$."
diff --git a/src/content/prod/old-ctdc.yaml b/src/content/prod/old-ctdc.yaml
new file mode 100644
index 00000000..a4dc2c6d
--- /dev/null
+++ b/src/content/prod/old-ctdc.yaml
@@ -0,0 +1,991 @@
+- page: '/submit'
+ title: "Data Submission"
+ primaryContentImage: https://raw.githubusercontent.com/CBIIT/datacommons-assets/ctdc_Assets/ctdc/images/aboutPages/About_CRDC.png
+ content:
+ - paragraph: "CTDC is not accepting external data submissions at this time. For more information on how to submit data to other data repositories within the Cancer Research Data Commons, please see $$[here](https://datascience.cancer.gov/data-commons)$$."
+- page: '/crdc'
+ title: "Cancer Research Data Commons (CRDC) and Analysis"
+ primaryContentImage: https://raw.githubusercontent.com/CBIIT/datacommons-assets/ctdc_Assets/ctdc/images/aboutPages/About_CRDC.png
+ content:
+ - paragraph: "$$#CRDC:#$$"
+ - paragraph: "$$[The Cancer Research DataCommons](https://datascience.cancer.gov/data-commons)$$ is an initiative from NCI’s Center for Biomedical
+ Informatics & Information Technology ($$[CBIIT](https://datascience.cancer.gov/)$$). CBIIT’s vision for the project consists of a
+ virtual, expandable infrastructure that provides secure access to many different data types across scientific domains,
+ allowing users to analyze, share, and store results, leveraging the storage and elastic compute, or ability to easily scale resources, of the
+ cloud. The ability to combine diverse data types and perform cross-domain analysis of large data sets can lead to new discoveries in cancer prevention,
+ treatment and diagnosis, and supports the goals of precision medicine and the Cancer Moonshot℠."
+ - paragraph: "Users can query the CTDC data via Graphical User Interface (GUI) or APIs. The GUI provides users a distilled set of parameters (faceted querying) they can use to explore the CTDC data. The APIs provide access to the full collection of parameters as seen in the $$[CTDC Data Model](https://github.com/CBIIT/ctdc-model)$$."
+ - paragraph: "$$#ANALYSIS- THE CLOUD RESOURCES:#$$"
+ - paragraph: "The CRDC has three Cloud Resources ($$[Seven Bridges Cancer Genomics Cloud](http://www.cancergenomicscloud.org/)$$,$$[ISB Cancer Genomics Cloud ](https://isb-cgc.appspot.com/)$$, $$[Terra ](https://terra.bio/)$$),
+ each providing analysis platforms for the community to use when working with CRDC data."
+ - paragraph: "These cloud-based platforms eliminate the need for researchers to download and store extremely large data sets by allowing them to bring
+ analysis tools to the data in the cloud, instead of the traditional process of bringing the data to the tools on local hardware. The Cloud Resources
+ also provide access to on-demand computational capacity to analyze these data. The Cloud Resources allow users to run best practice tools and pipelines
+ already implemented or upload their own data or analysis methods to workspaces."
+ - paragraph: "All three Cloud Resources provide support for data access through a web-based user interface in addition to programmatic access to analytic
+ tools and workflows, and the capability of sharing results with collaborators. Each Cloud Resource is continually developing new functionality to improve
+ the user experience and add new tools for researchers."
+ - paragraph: "$$*SEVEN BRIDGES CANCER GENOMICS CLOUD (SBG)*$$ is hosted on Amazon Web Services and has a rich user interface that allows researchers to find data of
+ interest and combine it with their own private data. Data can be analyzed using more than 200 preinstalled, curated bioinformatics tools and workflows.
+ Researchers can also extend the functionality of the platform by adding their own data and tools via an intuitive software development kit."
+ - paragraph: "$$*INSTITUTE FOR SYSTEMS BIOLOGY CANCER GENOMICS CLOUD (ISB-CGC)*$$ leveraging many aspects of the Google Cloud Platform, allows scientists to
+ interactively define and compare cohorts, examine underlying molecular data for specific genes and pathways, and share insights with collaborators.
+ For computational users, Application Program Interfaces (APIs) and Google Cloud Platform (GCP) resources such as BigQuery and Google Pipeline service,
+ allow complex queries from R or Python scripts, or Dockerized workflows to run on data available in the Google Cloud Storage."
+ - paragraph: "$$*BROAD INSTITUTE- TERRA*$$ is an open, standards-based platform for performing production-scale data analysis in the cloud. Built on the Google
+ Cloud Platform, Terra empowers analysts, tool developers, and production managers to run large-scale analysis and to share results with collaborators.
+ Users can upload their own analysis methods and data to workspaces or run the Broad’s best practice tools and pipelines."
+ - paragraph: "$$*FROM CTDC TO CLOUD RESOURCES:*$$"
+ - image: "MyCases-Wizard-Step4-SVG"
+ - paragraph: "Researchers find cases/cohorts in CTDC and then identify the files they would like to use for analysis. This list of files is called a Manifest.
+ The user will download the Manifest and then upload the Manifest into SBG where the files will be available for analysis. The user will need a SBG account.
+ The Manifest file is a text file consisting of CRDC Identifiers and, on uploading to SBG, the user will be able to access the relevant data files and see
+ some basic case information."
+ - paragraph: "This Cloud Resource analysis model eliminates the need for researchers to download and store extremely large data sets by allowing them to bring
+ analysis tools to the data in the cloud, instead of the traditional process of bringing the data to the tools on local hardware. The Cloud Resources also
+ provide access to on-demand computational capacity to analyze these data, allow users to run best practice tools and pipelines already implemented, and
+ upload their own data or analysis methods to workspaces."
+ - paragraph: "All three Cloud Resources provide support for data access through a web-based user interface in addition to programmatic access to analytic tools
+ and workflows, and the capability of sharing results with collaborators. Each Cloud Resource is continually developing new functionality to improve the user
+ experience and add new tools for researchers."
+ - paragraph: "Currently the CTDC supports analysis via the SBG Cloud Resource."
+- page: "/developers"
+ title: "Developers"
+ primaryContentImage: https://raw.githubusercontent.com/CBIIT/datacommons-assets/ctdc_Assets/ctdc/images/aboutPages/About_Developers.png
+ content:
+ - paragraph: "The CTDC System features a GraphQL API to enable querying of the entire data model."
+ - paragraph: "https://ctdc-dev.bento-tools.org/graphql/"
+ - paragraph: "The CTDC metadata is open to the public and will not require authentication to the system. However, some clinical data and all Data Files are not public so require permission."
+ - paragraph: "Users can query the CTDC data via Graphical User Interface (GUI) or API. The GUI provides users a distilled set of parameters (faceted querying) they can use to explore a subset of the CTDC data model. The API provide access to the full collection of parameters as seen in the $$[CTDC Data Model](https://ctdc-dev.bento-tools.org/#/ctdc-data-model)$$."
+ - paragraph: "The API is provided “as is”; there are no warranties or conditions arising out of usage of these services. "
+ - paragraph: "$$#GITHUB:#$$"
+ - paragraph: "As a resource for the public and cancer research, $$[the CTDC GitHub repo](https://github.com/CBIIT/bento-ctdc-frontend)$$ is available for research,
+ usage, forking, and pull requests. The codebase is intended for sharing and building frameworks for related initiatives
+ and projects. The CTDC GitHub repo has documentation about how to access the system, including endpoints and recommendations for
+ tools and example queries. Both the project and documentation are currently maintained and updated. "
+ - paragraph: "CTDC is based on a Graph database, and features a GraphQL API (Java) and a React front-end (JavaScript).
+ Each tier in the application stack is designed to be modular and adaptable for a variety of use-cases and scenarios. "
+- page: '/support'
+ title: "Support"
+ primaryContentImage: https://raw.githubusercontent.com/CBIIT/datacommons-assets/ctdc_Assets/ctdc/images/aboutPages/About_Support.png
+ content:
+ - paragraph: "If you have any questions, please contact us at $$[CTDCHelpDesk@mail.nih.gov](CTDCHelpDesk@mail.nih.gov)$$"
+- page: '/request-access'
+ title: "Request Access"
+ primaryContentImage: https://raw.githubusercontent.com/CBIIT/datacommons-assets/ctdc_Assets/ctdc/images/aboutPages/About_RequestAccess.png
+ content:
+ - paragraph: "CTDC provides open access to baseline information on the cases present in CTDC. However, the detailed clinical information and associated sequencing data files for cases presented in the CTDC are not open access, they require users to have permission. Requesting permission is a two step process;"
+ - paragraph: "STEP 1: Request access through the NCTN Data Archive site."
+ - paragraph: "STEP 2: Request access through dbGaP."
+ - paragraph: "Once access has been granted, users will be able to view and work with the CTDC data files and clinical information in one of the Cancer Research Data Common’s analysis resources. Currently the CTDC supports analysis via the SBG Cloud Resource ($$[Seven Bridges Cancer Genomics Cloud](http://www.cancergenomicscloud.org/)$$)."
+ - paragraph: "$$#STEP 1:#$$"
+ - paragraph: "Navigate to the $$[NCTN/NCORP Data Archive](https://nctn-data-archive.nci.nih.gov)$$ site. If you do not have an NCTN account, please create one. Once registered you will be able to request access to the trial of interest. It is suggested to use either the NCT Trial Number or the PubMed ID to search for the trial of interest- each trial in CTDC has an associated NCT Trial number and PubMed ID. A $$[PDF document](https://nctn-data-archive.nci.nih.gov/sites/default/files/RequestPDF.pdf)$$ is available detailing the steps for both registration and access requesting."
+ - paragraph: "$$#STEP 2:#$$"
+ - paragraph: "Navigate to $$[dbGaP’s Authorized Access](https://dbgap.ncbi.nlm.nih.gov/aa/wga.cgi?page=login)$$ site and login. If you do not have a dbGaP account, please create one.
+ If a user is requesting access through a collaborator or their Principal Investigator (PI) who already has access, then the PI/collaborator can add the user as a “Downloader”.
+ If a user is requesting access as a PI, then they will need to create a new research project within dbGaP. The following information is useful to add the appropriate dataset to the dbGaP research project request; ARM Q Study Accession ID is phs001926, ARM Z1D Study Accession ID is phs001859."
+- page: '/purpose'
+ title: "Purpose"
+ primaryContentImage: https://raw.githubusercontent.com/CBIIT/datacommons-assets/ctdc_Assets/ctdc/images/aboutPages/About_Purpose.png
+ content:
+ - paragraph: "The CTDC is an initiative from NCI’s Center for Biomedical Informatics and Information Technology (CBIIT) to expand access to data from precision medicine cancer trials to accelerate cancer research.
+ The CTDC as part of the Cancer Research Data Commons provides access to harmonized data from NCI-sponsored clinical trials, including genomic and clinical data from NCI’s $$[Molecular Analysis for Therapy Choice](https://www.cancer.gov/about-cancer/treatment/clinical-trials/nci-supported/nci-match)$$ (NCI-MATCH).
+ CTDC allows users to take advantage of NCI’s $$[Cloud Resources](https://datascience.cancer.gov/data-commons/cloud-resources)$$ for easy visualization and bioinformatic analysis of data, with no programming skills required."
+- page: '/ctdc-data-model'
+ title: 'CTDC Data Model'
+ primaryContentImage: 'https://raw.githubusercontent.com/CBIIT/datacommons-assets/ctdc_Assets/ctdc/images/aboutPages/About_CTDC_Model.png'
+ secondaryZoomImageTitle: "The CTDC Data Model"
+ secondaryZoomImage: 'https://cbiit.github.io/ctdc-model/model-desc/ctdc-model.svg'
+ content:
+ - paragraph: "$$#HARMONIZATION/INTEGRATION:#$$"
+ - paragraph: "The CTDC functions best for the research community when the data is integrated. Once a project
+ is accepted into the CTDC, the CTDC data team work with the submitting team to review the data looking
+ at data structure, data values, data quality as well as identifying any standards that were utilized.
+ Based on that review, a plan for how to submit the data will be agreed upon between CTDC and the submitter and the plan implemented. "
+ - paragraph: "$$#FAIR AND CITING:#$$"
+ - paragraph: "The CTDC will adhere to $$[FAIR](https://www.go-fair.org/fair-principles/)$$ principles of data stewardship: Findable, Accessible,
+ Interoperable, and Reusable. Please credit the CTDC in your manuscript. When citing individual projects,
+ please refer to the attribution policies of the project when available. "
+ - paragraph: "$$#LICENSE#$$"
+ - paragraph: "Data made available through the CTDC is for research purposes only. The CTDC provides researchers with
+ access to data from canine cancer studies to enable exploratory analysis that cannot be considered definitive for
+ outcomes. All data is publicly available. "
+ - paragraph: "$$#DATA MODEL & DICTIONARY:#$$"
+ - paragraph: "The CTDC data model is a representation of how all the constituent data are arranged relative to each
+ other. Given the number of studies, the range of study types and the multiple data types that the CTDC needs
+ to support, the data model will need to adapt to the needs of the science. The data model is not static and
+ is expected to change as new needs are identified. The data dictionary can be found $$[here](https://ctdc-dev.bento-tools.org/#/data-dictionary)$$."
+ - paragraph: "The SVG graphic below represents the current CTDC data model consisting of data nodes,
+ node properties, and relationships (edges). It provides a comprehensive mapping of the system data,
+ part of which may be viewed in the application interface and UI. In other words, additional nodes and
+ properties are available for inspection and querying beyond those presented on the front-end. "
+ - paragraph: "Additionally, the CTDC Data Model serves as a template for similar initiatives and data structures,
+ including graph-based database schemas. The model will continue to evolve as data needs are further discerned. "
+ - paragraph: "The tool used to generate this visual may be sourced on Github at: https://github.com/CBIIT/ctdc-model"
+ - paragraph: "The entire CTDC data model can be queried via API: https://ctdc-dev.bento-tools.org/graphql/"
+- page: '/data-dictionary'
+ title: 'CTDC Data Dictionary '
+ content:
+ title: "CTDC Nodes, Conventions, and Data Dictionary"
+ introduction: "Clinical trials in precision medicine generate large volumes of complex biomedical data types- patient metadata, molecular variant profiles and treatment outcomes. To address this complexity CTDC has used a graph structure to model a clinical trial workflow. Key trial entities have been modelled as node types, while the association between nodes have been explicitly modelled as relationship types. There are 18 node types and 20 relationship types in the CTDC data model, these are listed in the section CTDC Node Types and Table 1, respectively. Both nodes and relationships store data attributes in the form of properties. These data attributes are listed in the section CTDC Attribute Types. The CTDC graph data model has been implemented in Neo4J, a commercial graph database platform."
+ nodeTypes:
+ - nodeTypeTitle: "clinical_trial"
+ nodeTypeDescription: "An interventional clinical research study that is represented within the CTDC, in terms of its design, data and key results."
+ nodeTypeNUmber: "1"
+ - nodeTypeTitle: "arm"
+ nodeTypeDescription: "A treatment arm tests a single therapeutic agent against a set of genomic aberrations. Each arm may accrue at most 35 patients; for arms studying more common genomic aberrations up to 70 patients may be enrolled. Each treatment arm has a set of inclusion and exclusion criteria."
+ nodeTypeNUmber: "2"
+ - nodeTypeTitle: "case"
+ nodeTypeDescription: "An individual who has consented to participate in and has registered in the NCI-MATCH clinical trial. A case may be assigned to zero or more arms."
+ nodeTypeNUmber: "3"
+ - nodeTypeTitle: "metastatic_site"
+ nodeTypeDescription: "An anatomic site, that is not the primary site of disease, in trial participant, where cancerous cells have been detected. A case may have zero or more metastatic_sites."
+ nodeTypeNUmber: "4"
+ - nodeTypeTitle: "specimen"
+ nodeTypeDescription: "A material sample extracted from a case. One or more specimens may be associated with a case. Specimens may be of type tumor or normal."
+ nodeTypeNUmber: "5"
+ - nodeTypeTitle: "ihc_assay_report"
+ nodeTypeDescription: "A report generated from immunohistochemistry (IHC) tests performed on a specimen. In general, the expression of four genes: PTEN, MLH1, MSH2 and RB, is assayed by a single IHC assay run. Only one ihc_assay_report, per gene, is associated with a specimen."
+ nodeTypeNUmber: "6"
+ - nodeTypeTitle: "nucleic_acid"
+ nodeTypeDescription: "An aliquot of nucleic acid prepared from a specimen."
+ nodeTypeNUmber: "7"
+ - nodeTypeTitle: "sequencing_assay"
+ nodeTypeDescription: "A sequencing test performed on the nucleic_acid aliquot to identify genomic abnormalities present in the specimen. A case may have multiple sequencing assays."
+ nodeTypeNUmber: "8"
+ - nodeTypeTitle: "file"
+ nodeTypeDescription: "The output(s) of a sequencing test and associated bioinformatics pipelines. These may include raw sequence files, aligned read files and files of called variants."
+ nodeTypeNUmber: "9"
+ - nodeTypeTitle: "variant_report"
+ nodeTypeDescription: "The report generated from processing the output of the sequencing_assay, listing the genomic abnormalities identified in the specimen. Only one variant_report is associated with a sequencing_assay. A case may have multiple variant_reports."
+ nodeTypeDescriptionParagraphTwo: "The variant_report lists genomic variants of the represented by the following CTDC node types:"
+ nodeTypeNUmber: "10"
+ nodeTypeSubList:
+ - nodeTypeSubListTitle: "snv_variant"
+ nodeTypeSubListDescription: " sequence change where, compared to a reference sequence, one nucleotide is replaced by one nucleotide. This variant type corresponds to the substitution variant type in HGVS nomenclature. A variant_report may have zero or more snv_variants."
+ - nodeTypeSubListTitle: "delins_variant"
+ nodeTypeSubListDescription: "a sequence change where, compared to a reference sequence, one or more nucleotides are replaced by one or more other nucleotides. This variant type corresponds to the deletion-insertion variant type in HGVS nomenclature. A variant_report may have zero or more delins_variants."
+ - nodeTypeSubListTitle: "indel_variant"
+ nodeTypeSubListDescription: "a sequence change where a set of nucleotides are either inserted into or deleted from a genome sequence. This variant type includes the deletion and insertion variant types in HGVS nomenclature. A variant_report may have zero or more indel_variants."
+ - nodeTypeSubListTitle: "copy_number_variant"
+ nodeTypeSubListDescription: "a sequence change that results in the whole or partial gain or loss of copies of a gene. The MATCH clinical trial tests for copy number gain only. A variant_report may have zero or more copy_number_variants."
+ - nodeTypeSubListTitle: "gene_fusion_variant"
+ nodeTypeSubListDescription: "a chromosomal rearrangement that results in a hybrid gene. A variant_report may have zero or more gene_fusion_variants."
+ - nodeTypeTitle: "assignment_report"
+ nodeTypeDescription: "A report generated by the MATCHBox Rules Engine that summarizes the result of matching a patient’s: (a) disease and prior therapies (b) genomic variants (c) IHC test results, against the inclusion and exclusion criteria of all open treatment arms. The Assignment report lists the result of matching the patient’s data against each treatment arm. A case may have multiple assignment_reports."
+ nodeTypeNUmber: "11"
+ - nodeTypeTitle: "disease_eligibility_criterion"
+ nodeTypeDescription: "A disease diagnosis that determines a case’s participation in an arm of a clinical trial. Disease eligibility criteria may be inclusionary or exclusionary."
+ nodeTypeNUmber: "12"
+ - nodeTypeTitle: "drug_eligibility_criterion"
+ nodeTypeDescription: "A prior or current drug therapy that determines a case’s participation in an arm of a clinical trial. Drug eligibility criteria may be inclusionary or exclusionary."
+ nodeTypeNUmber: "13"
+ nodeTypeRelationShipTable:
+ - head:
+ - "Relationship Name"
+ - "Source Node"
+ - "Destination Node"
+ - "Multiplicity"
+ - body:
+ - row:
+ - "of_case"
+ - "specimen"
+ - "case"
+ - 'many_to_one'
+ - row:
+ - "of_specimen"
+ - "ihc_assay_report"
+ - "specimen"
+ - "many_to_one"
+ - row:
+ - "of_specimen"
+ - "nucleic_acid"
+ - "specimen"
+ - "many_to_one"
+ - row:
+ - "of_nucleic_acid"
+ - "sequencing_assay"
+ - "nucleic_acid"
+ - "many_to_one"
+ - row:
+ - "of_sequencing_assay"
+ - "file"
+ - "sequencing_assay"
+ - "many_to_one"
+ - row:
+ - "of_sequencing_assay"
+ - "variant_report"
+ - "sequencing_assay"
+ - "many_to_one"
+ - row:
+ - "snv_variant_of"
+ - "snv_variant"
+ - "variant_report"
+ - "many_to_many"
+ - row:
+ - "delins_variant_of"
+ - "delins_variant"
+ - "variant_report"
+ - "many_to_many"
+ - row:
+ - "indel_variant_of"
+ - "indel_variant"
+ - "variant_report"
+ - "many_to_many"
+ - row:
+ - "copy_number_variant_of"
+ - "copy_number_variant"
+ - "variant_report"
+ - "many_to_many"
+ - row:
+ - "gene_fusion_variant_of"
+ - "gene_fusion_variant"
+ - "variant_report"
+ - "many_to_many"
+ - row:
+ - "of_variant_report"
+ - "assignment_report"
+ - "variant_report"
+ - "many_to_one"
+ - row:
+ - "of_arm"
+ - "assignment_report"
+ - "arm"
+ - "many_to_one"
+ - row:
+ - "of_trial"
+ - "arm"
+ - "clinical_trial"
+ - "many_to_one"
+ - row:
+ - "of_arm"
+ - "case"
+ - "arm"
+ - "many_to_many"
+ - row:
+ - "of_specimen"
+ - "assignment_report"
+ - "specimen"
+ - "many_to_one"
+ - row:
+ - "met_site_of"
+ - "metastatic_site"
+ - "case"
+ - "many_to_many"
+ - row:
+ - "of_arm"
+ - "file"
+ - "arm"
+ - "many_to_one"
+ - row:
+ - "of_arm"
+ - "disease_eligibility_criterion"
+ - "arm"
+ - "many_to_many"
+ - row:
+ - "of_arm"
+ - "drug_eligibility_criterion"
+ - "arm"
+ - "many_to_many"
+ documentConventionsTitle: "Descriptors for Each Attribute"
+ documentConventionsDescription: "An interventional clinical research study that is represented within the CTDC, in terms of its design, data and key results. "
+ documentConventions:
+ - attributeName: "Attribute Name - "
+ attributeConventions: "name of the attribute."
+ - attributeName: "Definition - "
+ attributeConventions: "a concise description of the attribute. Attribute definitions have been developed by the CTDC data team and are specific to the CTDC data model."
+ - attributeName: "Attribute of Node/Relationship - "
+ attributeConventions: "the parent node or relationship type of attribute."
+ - attributeName: "Display Name - "
+ attributeConventions: "the display name of attribute on CTDC user interface. Display names are printed for only those attributes that displayed are in the current version of the user interface. All attributes listed below are available via CTDC’s GraphQL API."
+ - attributeName: "Required - "
+ attributeConventions: "a true/false label that indicates if attribute is a required field for CTDC data submitters."
+ - attributeName: "Type - "
+ attributeConventions: "the data type of the attribute (string, integer, number, boolean)."
+ - attributeName: "Constraints - "
+ attributeConventions: "rules enforced on attribute values."
+ - attributeName: "Enumeration - "
+ attributeConventions: "If applicable, the list of possible values for the attribute"
+ attribute:
+ - row:
+ - "case"
+ - "A unique numerical identifier assigned to each case by CTDC."
+ - "case"
+ - "Case ID"
+ - "true"
+ - "string"
+ - "Is Unique"
+ - "None"
+ - row:
+ - "gender"
+ - "Gender of case"
+ - "case"
+ - "Gender"
+ - "true"
+ - "string"
+ - "Of Enumeration"
+ - "MALE, FEMALE, UNKNOWN"
+ - row:
+ - "race"
+ - "Race of case"
+ - "case"
+ - "Race"
+ - "true"
+ - "string"
+ - "Of Enumeration"
+ - "AMERICAN_INDIAN_OR_ALASKA_NATIVE, ASIAN, BLACK_OR_AFRICAN_AMERICAN, NATIVE_HAWAIIAN_OR_OTHER_PACIFIC_ISLANDER, NOT_REPORTED, UNKNOWN, WHITE"
+ - row:
+ - "ethnicity"
+ - "Ethnicity of case"
+ - "case"
+ - "Ethnicity"
+ - "true"
+ - "string"
+ - "Of Enumeration"
+ - "HISPANIC_OR_LATINO, NOT_HISPANIC_OR_LATINO, UNKNOWN"
+ - row:
+ - "patient_status"
+ - "Status of case within the MATCH workflow"
+ - "case"
+ - ""
+ - "true"
+ - "string"
+ - "Of Enumeration"
+ - "COMPASSIONATE_CARE, OFF_TRIAL, OFF_TRIAL_BIOPSY_EXPIRED, OFF_TRIAL_DECEASED, OFF_TRIAL_NOT_CONSENTED, OFF_TRIAL_NO_TA_AVAILABLE, OFF_TRIAL_REGISTRATION_ERROR, ON_TREATMENT_ARM, PENDING_APPROVAL, PENDING_CONFIRMATION, PTEN_ORDER_REQUESTED, REGISTRATION, REGISTRATION_OUTSIDE_ASSAY"
+ - row:
+ - "current_step"
+ - "The MATCH workflow is divided broadly into steps. Step 0 starts with a case registering and ends with the sign-out of the first assignment report. If case is assigned to a treatment arm s/he moves to step 1. If case progresses during treatment, s/he is assigned to step 2. Current step denotes the step in the workflow the case is currently at."
+ - "case"
+ - ""
+ - "true"
+ - "integer"
+ - "patient_status >= 0"
+ - "None"
+ - row:
+ - "disease"
+ - "Disease condition diagnosed in a case."
+ - "case"
+ - "Diagnosis"
+ - "true"
+ - "string"
+ - "None"
+ - "None"
+ - row:
+ - "ctep_category"
+ - "The patient's disease diagnosis described according to the CTEP Simplified Disease Classification."
+ - "case"
+ - ""
+ - "false"
+ - "string"
+ - "None"
+ - "None"
+ - row:
+ - "ctep_subcategory"
+ - "A more granular mapping of the patient's disease diagnosis to the CTEP Simplified Disease "
+ - "case"
+ - ""
+ - "false"
+ - "string"
+ - "None"
+ - "None"
+ - row:
+ - "meddra_code"
+ - "MedDRA code that is assigned to patient's disease diagnosis and that maps to the CTEP subcategory."
+ - "case"
+ - ""
+ - "false"
+ - "string"
+ - "None"
+ - "none"
+ - row:
+ - "prior_drugs"
+ - "A list of drugs prescribed to patient prior to study registration."
+ - "case"
+ - ""
+ - "false"
+ - "string"
+ - "None"
+ - "None"
+ - row:
+ - "extent_of_disease"
+ - "Extent of disease at trial entry."
+ - "case"
+ - ""
+ - "false"
+ - "string"
+ - "of Enumeration"
+ - "Locally advanced, Metastatic, Recurrent"
+ - row:
+ - "ecog_performance_status"
+ - "ECOG Performance Status at study entry (integer values 0 to 5 – only 0 and 1 are eligible for NCI-MATCH). Source is ECOG-ACRIN."
+ - "case"
+ - ""
+ - "false"
+ - "integer"
+ - "0<= ecog_performance_status <= 5"
+ - "None"
+ - row:
+ - "specimen_id"
+ - "A unique identifier assigned to each MATCH specimen by CTDC."
+ - "specimen"
+ - ""
+ - "true"
+ - "string"
+ - "Is Unique"
+ - "None"
+ - row:
+ - "The type of material specimen extracted from a case."
+ - "Biopsy type."
+ - "specimen"
+ - ""
+ - "true"
+ - "string"
+ - "Of Enumeration"
+ - "Tumor, Normal, Unknown"
+ - row:
+ - "met_site_id"
+ - "A unique identifier assigned to a metastatic site by CTDC."
+ - "metastatic_site"
+ - ""
+ - "true"
+ - "string"
+ - "Is Unique"
+ - "None"
+ - row:
+ - "metastatic_site_name"
+ - "Name of anatomic site where metastasis was detected at trial entry"
+ - "metastatic_site"
+ - ""
+ - "true"
+ - "string"
+ - "Of Enumeration"
+ - "Bone Marrow, Bone, Lung, Pleura, Liver, Kidney, Stomach, Small intestine, Large intestine, Skin, Orbit, Conjunctiva, Parotid, Submandibular, Testicle, Epidural, CNS-Brain, CNS-Leptom, Pericardium, Lymph Nodes, Adrenals, Other"
+ - row:
+ - "aliquot_id"
+ - "A unique identifier assigned to an aliquot of nucleic acid, prepared from a specimen, by CTDC."
+ - "nucleic_acid"
+ - ""
+ - "true"
+ - "string"
+ - "Is Unique"
+ - "None"
+ - row:
+ - "nucleic_acid_concentration"
+ - "Concentration of nucleic acid aliquot extracted from a specimen."
+ - "nucleic_acid"
+ - ""
+ - "true"
+ - "number"
+ - "nucleic_acid_concentration > 0.0"
+ - "None"
+ - row:
+ - "nucleic_acid_volume"
+ - "Volume of nucleic acid aliquot extracted from a specimen."
+ - "nucleic_acid"
+ - ""
+ - "true"
+ - "number"
+ - "nucleic_acid_volume > 0.0"
+ - "None"
+ - row:
+ - "nucleic_acid_type"
+ - "Nucleic acid type of extract."
+ - "nucleic_acid"
+ - ""
+ - "true"
+ - "string"
+ - "Of Enumeration"
+ - "DNA, RNA, Pooled DNA/cDNA, Unknown"
+ - row:
+ - "ihc_assay_id"
+ - "A unique identifier assigned to each IHC assay report by CTDC."
+ - "ihc_assay_report"
+ - ""
+ - "true"
+ - "string"
+ - "Is Unique"
+ - "None"
+ - row:
+ - "ihc_test_gene"
+ - "Gene symbol of gene being assayed for expression by an immunohistochemistry (IHC) test.
+ Attribute of Node: ihc_assay_report"
+ - "ihc_assay_report"
+ - ""
+ - "true"
+ - "string"
+ - "Of Enumeration"
+ - "PTEN, RB, MLH1, MSH2"
+ - row:
+ - "ihc_test_result"
+ - "The result of the IHC test performed on gene"
+ - "ihc_assay_report"
+ - ""
+ - "true"
+ - "string"
+ - "Of Enumeration"
+ - "EXPRESSED, LOST, INDETERMINATE, UNKNOWN"
+ - row:
+ - "qc_result"
+ - "Sequencing QC result"
+ - "sequencing_assay"
+ - ""
+ - "false"
+ - "string"
+ - "None"
+ - "None"
+ - row:
+ - "variant_report_id"
+ - "A unique identifier assigned to each variant report by CTDC."
+ - "variant_report"
+ - ""
+ - "true"
+ - "string"
+ - "Is Unique"
+ - "None"
+ - row:
+ - "mapd"
+ - "Median Absolute Percentage Deviation (MAPD) is a measure of the accuracy of the copy number calls made by the copy number variant pipeline"
+ - "variant_report"
+ - ""
+ - "false"
+ - "number"
+ - "0.0 < mapd < = 0.5"
+ - "None"
+ - row:
+ - "cellularity"
+ - "The fraction of specimen estimated to be comprised of tumor cells."
+ - "variant_report"
+ - ""
+ - "false"
+ - "string"
+ - "None"
+ - "None"
+ - row:
+ - "file_description"
+ - "Optional description of the file and its content."
+ - "file"
+ - ""
+ - "false"
+ - "string"
+ - "None"
+ - "None"
+ - row:
+ - "file_format"
+ - "The specific format of the file as derived by the loader"
+ - "file"
+ - ""
+ - "true"
+ - "string"
+ - "None"
+ - "None"
+ - row:
+ - "file_location"
+ - "S3 bucket location of file in CTDC."
+ - "file"
+ - ""
+ - "true"
+ - "string"
+ - "Is Unique"
+ - "None"
+ - row:
+ - "file_name"
+ - "File name as assigned by parent project"
+ - "file"
+ - "File Name"
+ - "true"
+ - "string"
+ - "None"
+ - "None"
+ - row:
+ - "file_size"
+ - "Size of the file as derived by the loader"
+ - "file"
+ - "Size"
+ - "true"
+ - "number"
+ - "None"
+ - "None"
+ - row:
+ - "file_type"
+ - "Indicator as to the nature of the file in terms of its content."
+ - "file"
+ - "File Type"
+ - "true"
+ - "string"
+ - "Of Enumeration"
+ - "Raw reads file, Aligned DNA reads file, Aligned RNA reads file, Index file, Variants file"
+ - row:
+ - "md5sum"
+ - "A unique id assigned by CTDC."
+ - "file"
+ - ""
+ - "true"
+ - "string"
+ - "Is Unique"
+ - "None"
+ - row:
+ - "uuid"
+ - "A unique id assigned by CTDC."
+ - "file"
+ - ""
+ - "true"
+ - "string"
+ - "Is Unique"
+ - "None"
+ - row:
+ - "variant_id"
+ - "A unique identifier assigned to each variant by CTDC."
+ - "snv_variant, delins_variant, indel_variant, copy_number_variant, gene_fusion_variant"
+ - ""
+ - "true"
+ - "string"
+ - "Is Unique"
+ - "None"
+ - row:
+ - "external_variant_id"
+ - "The COSMIC ID of variant, if available, or another source project assigned ID."
+ - "snv_variant, delins_variant, indel_variant, copy_number_variant, gene_fusion_variant"
+ - ""
+ - "false"
+ - "string"
+ - "Is Unique"
+ - "None"
+ - row:
+ - "gene"
+ - "Gene symbol of gene that has been identified as having a variant number of copies."
+ - "snv_variant, delins_variant, indel_variant, copy_number_variant"
+ - ""
+ - "true"
+ - "string"
+ - "valid gene name"
+ - "None"
+ - row:
+ - "chromosome"
+ - "The chromosome that encodes variant gene."
+ - "snv_variant, delins_variant, indel_variant, copy_number_variant"
+ - ""
+ - "true"
+ - "string"
+ - "Of Enumeration"
+ - "chr1, chr2, chr3, chr4, chr5, chr6, chr7, chr8, chr9, chr10, chr11, chr12, chr13, chr14, chr15, chr16, chr17, chr18, chr19, chr20, chr21, chr22, chrX, chrY"
+ - row:
+ - "exon"
+ - "The exon that encodes the variant nucleotide position."
+ - "snv_variant, delins_variant, indel_variant"
+ - ""
+ - "true"
+ - "integer"
+ - "exon > 0"
+ - "None"
+ - row:
+ - "position"
+ - "The chromosomal location of nucleotide variant on the positive strand."
+ - "snv_variant, delins_variant, indel_variant"
+ - ""
+ - "true"
+ - "integer"
+ - "position > 0"
+ - "None"
+ - row:
+ - "reference"
+ - "Reference allele at variant position"
+ - "snv_variant, delins_variant, indel_variant"
+ - ""
+ - "true"
+ - "string"
+ - "string characters in (A, T, G, C, -)"
+ - "None"
+ - row:
+ - "alternative"
+ - "Alternative nucelotide identified at variant position."
+ - "snv_variant, delins_variant, indel_variant"
+ - ""
+ - "true"
+ - "string"
+ - "string characters in (A, T, G, C, -)"
+ - "None"
+ - row:
+ - "transcript_id"
+ - "NCBI Identifier of the transcript isoform chosen for reporting the variant."
+ - "snv_variant, delins_variant, indel_variant"
+ - ""
+ - "true"
+ - "string"
+ - "None"
+ - "None"
+ - row:
+ - "transcript_hgvs"
+ - "The transcript level change annotated in the HGVS format."
+ - "snv_variant, delins_variant, indel_variant"
+ - ""
+ - "false"
+ - "string"
+ - "None"
+ - "None"
+ - row:
+ - "genomic_hgvs"
+ - "The genomic change annotated in the HGVS format."
+ - "snv_variant, delins_variant, indel_variant"
+ - ""
+ - "false"
+ - "string"
+ - "Is Unique"
+ - "None"
+ - row:
+ - "oncomine_variant_class"
+ - "Variant classification label assigned by the variant calling workflow of the Ion Reporter Software."
+ - "snv_variant, delins_variant, indel_variant, copy_number_variant, gene_fusion_variant"
+ - ""
+ - "false"
+ - "string"
+ - "Of Enumeration"
+ - "Amplification, Deletion, Deleterious, Hotspot, Fusion"
+ - row:
+ - "variant_classification"
+ - "Variant classification based on the effect of genomic change."
+ - "snv_variant, delins_variant, indel_variant"
+ - ""
+ - "true"
+ - "string"
+ - "None"
+ - "None"
+ - row:
+ - "amino_acid_change"
+ - "The amino acid change resulting from the nucleotide-level change."
+ - "snv_variant, delins_variant, indel_variant"
+ - ""
+ - "true"
+ - "string"
+ - "None"
+ - "None"
+ - row:
+ - "tumor_suppressor"
+ - "A flag that indicates if variant gene is a tumor suppressor."
+ - "copy_number_variant"
+ - ""
+ - "false"
+ - "boolean"
+ - "Is Unique"
+ - "None"
+ - row:
+ - "gene1"
+ - "Gene symbol of upstream partner of a fusion variant."
+ - "gene_fusion_variant"
+ - ""
+ - "true"
+ - "string"
+ - "valid gene name"
+ - "None"
+ - row:
+ - "gene2"
+ - "Gene symbol of downstream partner of a fusion variant."
+ - "gene_fusion_variant"
+ - ""
+ - "true"
+ - "string"
+ - "valid gene name"
+ - "None"
+ - row:
+ - "assignment_report_id"
+ - "A unique identifier assigned to each assignment report by CTDC"
+ - "assignment_report"
+ - ""
+ - "true"
+ - "string"
+ - "Is Unique"
+ - "None"
+ - row:
+ - "step_at_assignment"
+ - "The step in the MATCH workflow at which patient has been assigned to treatment arm."
+ - "assignment_report"
+ - ""
+ - "false"
+ - "integer"
+ - "step_at_assignment > 0"
+ - "None"
+ - row:
+ - "assignment_logic"
+ - "The rationale for assigning treatment arm."
+ - "assignment_report"
+ - ""
+ - "false"
+ - "string"
+ - "None"
+ - "None"
+ - row:
+ - "assignment_outcome"
+ - "A status describing outcome of assigning patient to treatment arm"
+ - "assignment_report"
+ - ""
+ - "true"
+ - "string"
+ - "Of Enumeration"
+ - "COMPASSIONATE_CARE, FORMERLY_ON_ARM_DECEASED, FORMERLY_ON_ARM_OFF_TRIAL, FORMERLY_ON_ARM_PROGRESSED, NOT_ELIGIBLE, OFF_TRIAL, OFF_TRIAL_DECEASED, OFF_TRIAL_NO_TA_AVAILABLE, OFF_TRIAL_NOT_CONSENTED, ON_TREATMENT_ARM, PENDING_APPROVAL, PENDING_CONFIRMATION"
+ - row:
+ - "treatment_outcome"
+ - "Best Confirmed Response. Source is ECOG-ACRIN."
+ - "assignment_report"
+ - ""
+ - "false"
+ - "string"
+ - "Of Enumeration"
+ - "Complete response, Partial response, Stable disease, Progressive disease, Not evaluable"
+ - row:
+ - "arm_id"
+ - "A unique identifier assigned to a trial arm."
+ - "arm"
+ - "Treatment Arm"
+ - "true"
+ - "string"
+ - "Of Enumeration"
+ - "A, C2, E, L, T, V, Z1E, Z1G, Z1H, Z1K, Z1L, Z1J, Z1M, C1, J, K1, K2, M, Z1C, Z1F, F, G, H, R, U, S1, S2, X, Z1A, B, Z1B, Z1D, Z1I, I, N, P, Q, W, Y"
+ - row:
+ - "arm_target"
+ - "A concise description of genomic aberrations being targeted by trial arm."
+ - "arm"
+ - "Treatment Arm Target"
+ - "true"
+ - "string"
+ - "None"
+ - "None"
+ - row:
+ - "arm_drug"
+ - "The therapeutic agent being tested against the target genomic aberrations."
+ - "arm"
+ - "Treatment Arm Drug"
+ - "true"
+ - "string"
+ - "None"
+ - "None"
+ - row:
+ - "pubmed_id"
+ - "PubMed ID of the primary publication associated with trial arm."
+ - "arm"
+ - "PubMed ID"
+ - "true"
+ - "string"
+ - "None"
+ - "None"
+ - row:
+ - "clinical_trial_id"
+ - "The Trial ID assigned to trial by ClinicalTrials.gov"
+ - "clinical_trial"
+ - "Trial ID"
+ - "true"
+ - "string"
+ - "Is Unique"
+ - "None"
+ - row:
+ - "clinical_trial_description"
+ - "A brief description of clinical trial."
+ - "clinical_trial"
+ - "Trial Description"
+ - "true"
+ - "string"
+ - "None"
+ - "None"
+ - row:
+ - "clinical_trial_designation"
+ - "A concise name for clinical trial eg. NCI-MATCH"
+ - "clinical_trial"
+ - "Trial Code"
+ - "true"
+ - "string"
+ - "None"
+ - "None"
+ - row:
+ - "clinical_trial_long_name"
+ - "The complete name of the clinical trial, e.g. Targeted Therapy Directed by Genetic Testing in Treating Patients with Advanced Refractory Solid Tumors, Lymphomas, or Multiple Myeloma (The MATCH Screening Trial)."
+ - "clinical_trial"
+ - "Trial Name"
+ - "true"
+ - "string"
+ - "None"
+ - "None"
+ - row:
+ - "clinical_trial_type"
+ - "Clinical trial type"
+ - "clinical_trial"
+ - "Trial Type"
+ - "true"
+ - "string"
+ - "None"
+ - "None"
+ - row:
+ - "lead_organization"
+ - "Lead Organization"
+ - "clinical_trial"
+ - "Lead Organization"
+ - "true"
+ - "string"
+ - "None"
+ - "None"
+ - row:
+ - "principal_investigators"
+ - "Principal Investigators"
+ - "clinical_trial"
+ - "Principal Investigators"
+ - "true"
+ - "string"
+ - "None"
+ - "None"
+ - row:
+ - "allele_frequency"
+ - "Frequency of alternative nucleotide as determined by number of reads carrying alternative nucleotide as a fraction of total number of reads mapping to variant position."
+ - ""
+ - ""
+ - "true"
+ - "number"
+ - "allele_frequency > 0.0"
+ - "None"
+ - "snv_variant_of, delins_variant_of, indel_variant_of"
+ - row:
+ - "copy_number"
+ - "The estimated number of copies for the copy number variant gene."
+ - ""
+ - ""
+ - "true"
+ - "number"
+ - "copy_number >= 0"
+ - "None"
+ - "copy_number_variant_of"
+ - row:
+ - "copy_number_ci_5"
+ - "The lower limit of the 95% confidence interval for the estimated copy number."
+ - ""
+ - ""
+ - "true"
+ - "number"
+ - "copy_number_ci_5 >= 0"
+ - "None"
+ - "copy_number_variant_of"
+ - row:
+ - "copy_number_ci_95"
+ - "The upper limit of the 95% confidence interval for the estimated copy number."
+ - "copy_number_variant_of"
+ - ""
+ - "true"
+ - "number"
+ - "copy_number_ci_95 >= 0"
+ - "None"
+ - "copy_number_variant_of"
+ - row:
+ - "gene1_read_count"
+ - "Number of reads mapping to the upstream gene partner."
+ - ""
+ - ""
+ - "true"
+ - "integer"
+ - "gene1_read_count > 0"
+ - "None"
+ - "gene_fusion_variant_of"
+ - row:
+ - "gene2_read_count"
+ - "Number of reads mapping to the downstream gene partner."
+ - ""
+ - ""
+ - "true"
+ - "integer"
+ - "gene2_read_count > 0"
+ - "None"
+ - "gene_fusion_variant_of"